def addAuthorToDB(authorname=None, refresh=False): myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] if 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) update_totals(authorid) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False): myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] if 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) update_totals(authorid) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid) lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone() bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) for count in bookCount: controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": count['counter'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % str(count['counter']))
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" #type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.info("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.info("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) logger.info("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid)
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], authorname)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % authorname).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], authorname)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % authorname).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], authorname)) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" # noinspection PyBroadException try: myDB = database.DBConnection() match = False author = None authorimg = '' new_author = not refresh entry_status = '' if authorid: dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (authorid,)) if not dbauthor: authorname = 'unknown author' logger.debug("Adding new author id %s to database" % authorid) new_author = True else: entry_status = dbauthor['Status'] authorname = dbauthor['authorname'] logger.debug("Updating author %s " % authorname) new_author = False controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} if new_author: newValueDict["AuthorName"] = "Loading" newValueDict["AuthorImg"] = "images/nophoto.png" myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorid) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] if not dbauthor: newValueDict["AuthorName"] = author['authorname'] elif dbauthor['authorname'] != author['authorname']: authorname = dbauthor['authorname'] logger.warn("Authorname mismatch for %s [%s][%s]" % (authorid, dbauthor['authorname'], author['authorname'])) myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn("Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID=?', (authorid,)) if authorname and author and not match: authorname = ' '.join(authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) author = GR.find_author_id(refresh=refresh) dbauthor = myDB.match("SELECT * from authors WHERE AuthorName=?", (authorname,)) if author and not dbauthor: # may have different name for same authorid (spelling?) dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (author['authorid'],)) authorname = dbauthor['AuthorName'] controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) entry_status = dbauthor['Status'] new_author = False myDB.upsert("authors", newValueDict, controlValueDict) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if dbauthor: if authorname != dbauthor['authorname']: # name change might be users preference logger.warn("Conflicting authorname for %s [%s][%s] Ignoring change" % (author['authorid'], authorname, dbauthor['authorname'])) authorname = dbauthor['authorname'] # cmd = 'UPDATE authors SET AuthorName=? WHERE AuthorName=?' # myDB.action(cmd, (author['authorname'], dbauthor['authorname'])) if author['authorid'] != dbauthor['authorid']: # GoodReads may have altered authorid? logger.warn("Conflicting authorid for %s (%s:%s) Moving to new authorid" % (authorname, author['authorid'], dbauthor['authorid'])) cmd = 'UPDATE books SET AuthorID=? WHERE AuthorID=?' myDB.action(cmd, (author['authorid'], dbauthor['authorid'])) myDB.action('DELETE from authors WHERE AuthorID=?', (dbauthor['authorid'],)) dbauthor = None if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn("Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName=?', (authorname,)) return if not match: logger.error("No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID=?", (authorid,)) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success, _ = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] audiostatus = lazylibrarian.CONFIG['NEWAUTHOR_AUDIO'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']: entry_status = 'Active' # default for invalid/unknown or "loading" # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": if lazylibrarian.CONFIG['GB_API']: book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus=bookstatus, audiostatus=audiostatus, entrystatus=entry_status, refresh=refresh) # if lazylibrarian.CONFIG['GR_API']: # book_api = GoodReads(authorname) # book_api.get_author_books(authorid, authorname, bookstatus=bookstatus, # ausiostatus=audiostatus, entrystatus=entry_status, # refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": if lazylibrarian.CONFIG['GR_API']: book_api = GoodReads(authorname) book_api.get_author_books(authorid, authorname, bookstatus=bookstatus, audiostatus=audiostatus, entrystatus=entry_status, refresh=refresh) # if lazylibrarian.CONFIG['GB_API']: # book_api = GoogleBooks() # book_api.get_author_books(authorid, authorname, bookstatus=bookstatus, # audiostatus=audiostatus, entrystatus=entry_status, # refresh=refresh) update_totals(authorid) if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']: res = grfollow(authorid, True) if res.startswith('Unable'): logger.warn(res) try: followid = res.split("followid=")[1] logger.debug('%s marked followed' % authorname) except IndexError: followid = '' myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?', (followid, authorid)) else: # if we're not loading any books, mark author as ignored entry_status = 'Ignored' controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": entry_status} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete, status %s" % (authorname, entry_status) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc() logger.error(msg) return msg
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = { "BookID": "BookID: %s" % (bookid), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def processAlternate(source_dir=None): # import a book from an alternate directory if not source_dir or os.path.isdir(source_dir) is False: logger.warn('Alternate directory not found') return if source_dir == lazylibrarian.DESTINATION_DIR: logger.warn('Alternate directory must not be the same as destination') return logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = opf_file(source_dir) if os.path.isfile(metafile): try: metadata = get_book_info(metafile) except: logger.debug('Failed to read metadata from %s' % metafile) else: logger.debug('No metadata file found for %s' % new_book) if not 'title' in metadata and 'creator' in metadata: # try to get metadata from the book file try: metadata = get_book_info(new_book) except: logger.debug('No metadata found in %s' % new_book) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if not authmatch: # try goodreads preferred authorname logger.debug("Checking GoodReads for [%s]" % authorname) GR = GoodReads(authorname) try: author_gr = GR.find_author_id() except: logger.debug("No author id for [%s]" % authorname) if author_gr: grauthorname = author_gr['authorname'] logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname)) authorname = grauthorname authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) bookid = find_book_in_db(myDB, authorname, bookname) if bookid: import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir)
def addAuthorNameToDB(author=None, refresh=False, addbooks=True): # get authors name in a consistent format, look them up in the database # if not in database, try to import them. # return authorname,authorid,new where new=False if author already in db, new=True if added # authorname returned is our preferred name, or empty string if not found or unable to add new = False if not author or len(author) < 2: logger.debug('Invalid Author Name [%s]' % author) return "", "", False author = formatAuthorName(author) myDB = database.DBConnection() # Check if the author exists, and import the author if not, check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorName=?', (author,)) # If no exact match, look for a close fuzzy match to handle misspellings, accents if not check_exist_author: match_name = author.lower() res = myDB.action('select AuthorID,AuthorName from authors') for item in res: aname = item['AuthorName'] if aname: match_fuzz = fuzz.ratio(aname.lower(), match_name) if match_fuzz >= 95: logger.debug("Fuzzy match [%s] %s%% for [%s]" % (item['AuthorName'], match_fuzz, author)) check_exist_author = item author = item['AuthorName'] break if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']: logger.debug('Author %s not found in database, trying to add' % author) # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except Exception as e: logger.warn("%s finding author id for [%s] %s" % (type(e).__name__, author, str(e))) return "", "", False # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # authorid = author_gr['authorid'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', ' ') match_auth = ' '.join(match_auth.split()) match_name = authorname.replace('.', ' ') match_name = ' '.join(match_name.split()) match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] to authorname [%s] fuzz [%d]" % (author, match_name, match_fuzz)) # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] authorid = author_gr['authorid'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,)) if check_exist_author: logger.debug('Found goodreads authorname %s in database' % author) else: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks) check_exist_author = myDB.match('SELECT AuthorID FROM authors where AuthorID=?', (authorid,)) if check_exist_author: new = True except Exception as e: logger.error('Failed to add author [%s] to db: %s %s' % (author, type(e).__name__, str(e))) # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) return "", "", False author = makeUnicode(author) return author, check_exist_author['AuthorID'], new
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() new_authors = [] logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if (lazylibrarian.FULL_SCAN): books = myDB.select('select AuthorName, BookName from books where Status=?',[u'Open']) status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: for book_type in getList(lazylibrarian.EBOOK_TYPE): bookName = book['BookName'] bookAuthor = book['AuthorName'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) if os.path.isfile(encoded_book_path): book_exists = True if not book_exists: myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) latest_subdirectory = [] for r,d,f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) #prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: subdirectory = r.replace(dir,'') latest_subdirectory.append(subdirectory) logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = '' count=-1; booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count+=1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|'+book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e","(?P<book>.*?)")+'\.['+booktypes+']' #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE) pattern = re.compile(matchString, re.VERBOSE) match = pattern.match(files) if match: author = match.group("author") book = match.group("book") #check if book is in database, and not marked as in library check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",[author,book,'Open']).fetchone() if not check_exist_book: check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?",[author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: GR = GoodReads(author) try: author_gr = GR.find_author_id() except: continue #only try to add if GR data matches found author data if author_gr: authorid = author_gr['authorid'] authorlink = author_gr['authorlink'] pageIdx = authorlink.rfind('/') authorlink = authorlink[pageIdx+1:] match_auth = authorid+"."+author.replace('. ','_') logger.debug(match_auth) logger.debug(authorlink) if match_auth == authorlink: logger.info("Adding %s" % author) try: importer.addAuthorToDB(author) except: continue check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=?",[author,book]).fetchone() if check_exist_book: if author not in new_authors: new_authors.append(author) myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book]) new_book_count += 1 else: logger.info("Unable to match %s in GoodReads database" % author) else: if author not in new_authors: new_authors.append(author) myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book]) new_book_count += 1 file_count += 1 logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len(myDB.select('select BookName from Books where status=? and AuthorName=?',['Open',auth])) myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',[havebooks,auth]) totalbooks = len(myDB.select('select BookName from Books where status!=? and AuthorName=?',['Ignored',auth])) myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',[totalbooks,auth]) logger.info('Library scan complete')
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() new_authors = [] logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if (lazylibrarian.FULL_SCAN): books = myDB.select( 'select AuthorName, BookName from books where Status=?', [u'Open']) status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: for book_type in getList(lazylibrarian.EBOOK_TYPE): bookName = book['BookName'] bookAuthor = book['AuthorName'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', bookAuthor).replace('$Title', bookName) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', bookAuthor).replace('$Title', bookName) encoded_book_path = os.path.join( dir, dest_path, global_name + "." + book_type).encode( lazylibrarian.SYS_ENCODING) if os.path.isfile(encoded_book_path): book_exists = True if not book_exists: myDB.action( 'update books set Status=? where AuthorName=? and BookName=?', [status, bookAuthor, bookName]) logger.info('Book %s updated as not found on disk' % encoded_book_path.decode( lazylibrarian.SYS_ENCODING, 'replace')) if bookAuthor not in new_authors: new_authors.append(bookAuthor) latest_subdirectory = [] for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) #prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace( "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE) pattern = re.compile(matchString, re.VERBOSE) match = pattern.match(files) if match: author = match.group("author") book = match.group("book") #check if book is in database, and not marked as in library check_exist_book = myDB.action( "SELECT * FROM books where AuthorName=? and BookName=? and Status!=?", [author, book, 'Open']).fetchone() if not check_exist_book: check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: GR = GoodReads(author) try: author_gr = GR.find_author_id() except: continue #only try to add if GR data matches found author data if author_gr: authorid = author_gr['authorid'] authorlink = author_gr['authorlink'] pageIdx = authorlink.rfind('/') authorlink = authorlink[pageIdx + 1:] #match_auth = authorid+"."+author.replace('. ','_') #Original Line does not allow author match. match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_auth = authorid + "." + match_auth # Hopefully someone can come up with a more efficient way of doing this. logger.debug(match_auth) logger.debug(authorlink) if match_auth == authorlink: logger.info("Adding %s" % author) try: importer.addAuthorToDB(author) except: continue check_exist_book = myDB.action( "SELECT * FROM books where AuthorName=? and BookName=?", [author, book]).fetchone() if check_exist_book: if author not in new_authors: new_authors.append(author) myDB.action( 'UPDATE books set Status=? where AuthorName=? and BookName=?', ['Open', author, book]) new_book_count += 1 else: logger.info( "Unable to match %s in GoodReads database" % author) else: if author not in new_authors: new_authors.append(author) myDB.action( 'UPDATE books set Status=? where AuthorName=? and BookName=?', ['Open', author, book]) new_book_count += 1 file_count += 1 logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select( 'select BookName from Books where status=? and AuthorName=?', ['Open', auth])) myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?', [havebooks, auth]) totalbooks = len( myDB.select( 'select BookName from Books where status!=? and AuthorName=?', ['Ignored', auth])) myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?', [totalbooks, auth]) logger.info('Library scan complete')
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh if authorid: controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} dbauthor = myDB.match("SELECT * from authors WHERE AuthorID='%s'" % authorid) if not dbauthor: authorname = 'unknown author' logger.debug("Now adding new author id: %s to database" % authorid) new_author = True else: authorname = dbauthor['authorname'] logger.debug("Now updating author %s " % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID="%s"' % authorid) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID='%s'" % authorid) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(authorname) GR.get_author_books(authorid, authorname, bookstatus, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) else: # if we're not loading any books, mark author as ignored controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Ignored"} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete" % authorname logger.debug(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def processAlternate(source_dir=None): # import a book from an alternate directory try: if not source_dir: logger.warn("Alternate Directory not configured") return False elif not os.path.isdir(source_dir): logger.warn("Alternate Directory [%s] not found" % source_dir) return False if source_dir == lazylibrarian.DIRECTORY('Destination'): logger.warn('Alternate directory must not be the same as Destination') return False logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = opf_file(source_dir) if metafile and os.path.isfile(metafile): try: metadata = get_book_info(metafile) except Exception as e: logger.debug('Failed to read metadata from %s, %s' % (metafile, str(e))) else: logger.debug('No metadata file found for %s' % new_book) if 'title' not in metadata or 'creator' not in metadata: # if not got both, try to get metadata from the book file try: metadata = get_book_info(new_book) except Exception as e: logger.debug('No metadata found in %s, %s' % (new_book, str(e))) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if not authmatch: # try goodreads preferred authorname logger.debug("Checking GoodReads for [%s]" % authorname) GR = GoodReads(authorname) try: author_gr = GR.find_author_id() except Exception: logger.debug("No author id for [%s]" % authorname) if author_gr: grauthorname = author_gr['authorname'] logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname)) authorname = grauthorname authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) bookid = find_book_in_db(myDB, authorname, bookname) if bookid: return import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir) return False except Exception as e: logger.error('Unhandled exception in processAlternate: %s' % traceback.format_exc())
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh entry_status = '' if authorid: dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (authorid, )) if not dbauthor: authorname = 'unknown author' logger.debug("Adding new author id %s to database" % authorid) new_author = True else: entry_status = dbauthor['Status'] authorname = dbauthor['authorname'] logger.debug("Updating author %s " % authorname) new_author = False controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID=?', (authorid, )) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) author = GR.find_author_id(refresh=refresh) query = "SELECT * from authors WHERE AuthorName=?" dbauthor = myDB.match(query, (authorname.replace("'", "''"), )) if author and not dbauthor: # may have different name for same authorid (spelling?) query = "SELECT * from authors WHERE AuthorID=?" dbauthor = myDB.match(query, (author['authorid'], )) authorname = dbauthor['AuthorName'] controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) entry_status = dbauthor['Status'] new_author = False myDB.upsert("authors", newValueDict, controlValueDict) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName=?', (authorname, )) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID=?", (authorid, )) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: # audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']: entry_status = 'Active' # default for invalid/unknown or "loading" # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": book_api = GoodReads(authorname) book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']: res = grfollow(authorid, True) if res.startswith('Unable'): logger.warn(res) try: followid = res.split("followid=")[1] logger.debug('%s marked followed' % authorname) except IndexError: followid = '' myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?', (followid, authorid)) else: # if we're not loading any books, mark author as ignored entry_status = 'Ignored' controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": entry_status} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete, status %s" % (authorname, entry_status) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def addAuthorToDB(authorname=None): type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GB.find_results() for book in books: # this is for rare cases where google returns multiple authors who share nameparts if book['authorname'] == authorname: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount + 1 lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC" % authorname).fetchone() controlValueDict = {"AuthorName": authorname} newValueDict = { "Status": "Active", "TotalBooks": bookscount, "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % bookscount)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() if not lazylibrarian.GB_API: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + str( bookid) + "?key=" + lazylibrarian.GB_API jsonresults, in_cache = self.get_request(URL) # Darkie67: # replacing German Umlauts and filtering out ":" # # booknamealt = jsonresults['volumeInfo']['title'] # booknametmp1=booknamealt.replace(u'\xf6',u'oe') # booknametmp2=booknametmp1.replace(u'\xe4',u'ae') # booknametmp3=booknametmp2.replace(u'\xdf',u'ss') # booknametmp4=booknametmp3.replace(u'\xc4',u'Ae') # booknametmp5=booknametmp4.replace(u'\xdc',u'Ue') # booknametmp6=booknametmp5.replace(u'\xd6',u'Oe') # booknametmp7=booknametmp6.replace(':','') # bookname=booknametmp7.replace(u'\xfc',u'ue') bookname = jsonresults['volumeInfo']['title'] bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace # Darkie67 end try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname)
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,)) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],)) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = "https://www.googleapis.com/books/v1/volumes/" + str(bookid) + "?key=" + lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read()) bookname = jsonresults["volumeInfo"]["title"] try: authorname = jsonresults["volumeInfo"]["authors"][0] except KeyError: logger.debug("Book %s does not contain author field" % bookname) try: # skip if language is in ignore list booklang = jsonresults["volumeInfo"]["language"] valid_langs = [valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(",")] if booklang not in valid_langs: logger.debug("Book %s language does not match preference" % bookname) except KeyError: logger.debug("Book does not have language field") try: bookpub = jsonresults["volumeInfo"]["publisher"] except KeyError: bookpub = None try: booksub = jsonresults["volumeInfo"]["subtitle"] except KeyError: booksub = None try: bookdate = jsonresults["volumeInfo"]["publishedDate"] except KeyError: bookdate = "0000-00-00" try: bookimg = jsonresults["volumeInfo"]["imageLinks"]["thumbnail"] except KeyError: bookimg = "images/nocover.png" try: bookrate = jsonresults["volumeInfo"]["averageRating"] except KeyError: bookrate = 0 try: bookpages = jsonresults["volumeInfo"]["pageCount"] except KeyError: bookpages = 0 try: bookgenre = jsonresults["volumeInfo"]["categories"][0] except KeyError: bookgenre = None try: bookdesc = jsonresults["volumeInfo"]["description"] except KeyError: bookdesc = None try: if jsonresults["volumeInfo"]["industryIdentifiers"][0]["type"] == "ISBN_10": bookisbn = jsonresults["volumeInfo"]["industryIdentifiers"][0]["identifier"] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults["volumeInfo"]["canonicalVolumeLink"] bookrate = float(bookrate) name = jsonresults["volumeInfo"]["authors"][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author["authorid"] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today(), } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GB.find_results() for book in books: # this is for rare cases where google returns multiple authors who share nameparts if book['authorname'] == authorname: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookSub": book['booksub'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookPub": book['bookpub'], "BookGenre": book['bookgenre'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount+1 lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC" % authorname).fetchone() controlValueDict = {"AuthorName": authorname} newValueDict = { "Status": "Active", "TotalBooks": bookscount, "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % bookscount)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() if not lazylibrarian.GB_API: logger.warn("No GoogleBooks API key, check config") URL = "https://www.googleapis.com/books/v1/volumes/" + str(bookid) + "?key=" + lazylibrarian.GB_API jsonresults, in_cache = self.get_request(URL) # Darkie67: # replacing German Umlauts and filtering out ":" # # booknamealt = jsonresults['volumeInfo']['title'] # booknametmp1=booknamealt.replace(u'\xf6',u'oe') # booknametmp2=booknametmp1.replace(u'\xe4',u'ae') # booknametmp3=booknametmp2.replace(u'\xdf',u'ss') # booknametmp4=booknametmp3.replace(u'\xc4',u'Ae') # booknametmp5=booknametmp4.replace(u'\xdc',u'Ue') # booknametmp6=booknametmp5.replace(u'\xd6',u'Oe') # booknametmp7=booknametmp6.replace(':','') # bookname=booknametmp7.replace(u'\xfc',u'ue') bookname = jsonresults["volumeInfo"]["title"] bookname = bookname.replace(":", "").replace('"', "").replace("'", "") bookname = unidecode(u"%s" % bookname) bookname = bookname.strip() # strip whitespace # Darkie67 end try: authorname = jsonresults["volumeInfo"]["authors"][0] except KeyError: logger.debug("Book %s does not contain author field, skipping" % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults["volumeInfo"]["language"] valid_langs = [valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(",")] if booklang not in valid_langs: logger.debug("Book %s language does not match preference" % bookname) except KeyError: logger.debug("Book does not have language field") booklang = "Unknown" try: bookpub = jsonresults["volumeInfo"]["publisher"] except KeyError: bookpub = None try: booksub = jsonresults["volumeInfo"]["subtitle"] except KeyError: booksub = None try: bookdate = jsonresults["volumeInfo"]["publishedDate"] except KeyError: bookdate = "0000-00-00" try: bookimg = jsonresults["volumeInfo"]["imageLinks"]["thumbnail"] except KeyError: bookimg = "images/nocover.png" try: bookrate = jsonresults["volumeInfo"]["averageRating"] except KeyError: bookrate = 0 try: bookpages = jsonresults["volumeInfo"]["pageCount"] except KeyError: bookpages = 0 try: bookgenre = jsonresults["volumeInfo"]["categories"][0] except KeyError: bookgenre = None try: bookdesc = jsonresults["volumeInfo"]["description"] except KeyError: bookdesc = None try: if jsonresults["volumeInfo"]["industryIdentifiers"][0]["type"] == "ISBN_10": bookisbn = jsonresults["volumeInfo"]["industryIdentifiers"][0]["identifier"] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults["volumeInfo"]["canonicalVolumeLink"] bookrate = float(bookrate) name = jsonresults["volumeInfo"]["authors"][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author["authorid"] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today(), } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname)
def find_book(bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = get_json_request(URL) if not jsonresults: logger.debug('No results found for %s' % bookid) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if booklang not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, booklang)) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = "" series = "" seriesNum = "" try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split( ')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" except KeyError: booksub = "" try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = "" try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = "" except KeyError: bookisbn = "" booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) new_authors = [] logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if lazylibrarian.FULL_SCAN: books = myDB.select("select AuthorName, BookName, BookFile, BookID from books where Status=?", [u"Open"]) status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if os.path.isfile(bookfile): book_exists = True else: myDB.action("update books set Status=? where BookID=?", [status, bookID]) myDB.action('update books set BookFile="" where BookID=?', [bookID]) logger.info("Book %s updated as not found on disk" % bookfile) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # #Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # #dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = "" count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: logger.info( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR later # # Is it a book (extension found in booktypes) match = 0 words = files.split(".") extn = words[len(words) - 1] if extn in booktypes: # see if there is a metadata file in this folder with the info we need try: metafile = os.path.join(r, "metadata.opf").encode(lazylibrarian.SYS_ENCODING) res = get_book_info(metafile) if res: book = res["title"] author = res["creator"] language = res["language"] isbn = res["identifier"] match = 1 logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) except: logger.debug("No metadata file in %s" % r) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) res = get_book_info(book_file) if res: book = res["title"] author = res["creator"] language = res["language"] isbn = res["identifier"] match = 1 logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) else: processed_subdirectories.append(subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" # strip any formatting from the isbn isbn = re.sub("[- ]", "", isbn) if len(isbn) != 10 and len(isbn) != 13: isbn = "" if not isbn.isdigit(): isbn = "" if isbn != "" and language != "Unknown": logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already there if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" author = author.replace(". ", " ") author = author.replace(".", " ") author = author.replace(" ", " ") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?", [author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.error("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The (currently non-configurable) value of fuzziness works for one accented character # We stored GoodReads unmodified author name in author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.info("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.info("match author [%s] authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the database, so check again check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author] ).fetchone() if not check_exist_author: logger.info("Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author] ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.info("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, we already had it) check_status = myDB.action("SELECT Status from books where BookID=?", [bookid]).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action("UPDATE books set Status=? where BookID=?", [u"Open", bookid]) book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it gets removed, or maybe allow click-to-open? myDB.action("UPDATE books set BookFile=? where BookID=?", [book_file, bookid]) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.info("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.info("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.info("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.info("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.info("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.info("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.info("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.info("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.info("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.info("ISBN Language cache holds %s entries" % cachesize["count(*)"]) stats = len(myDB.select("select BookID from Books where status=? and BookLang=?", ["Open", "Unknown"])) logger.info("There are %s books in your library with unknown language" % stats) logger.info("Updating %i authors" % len(new_authors)) for auth in new_authors: havebooks = len(myDB.select("select BookName from Books where status=? and AuthorName=?", ["Open", auth])) myDB.action("UPDATE authors set HaveBooks=? where AuthorName=?", [havebooks, auth]) totalbooks = len(myDB.select("select BookName from Books where status!=? and AuthorName=?", ["Ignored", auth])) myDB.action("UPDATE authors set UnignoredBooks=? where AuthorName=?", [totalbooks, auth]) logger.info("Library scan complete")
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode('utf-8') subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (dir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" words = files.split('.') extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select('select AuthorName from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug('Updating bookcounts for %i authors' % len(authors)) for author in authors: name = author['AuthorName'] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name)) covers = myDB.action("select count('bookimg') as counter from books where bookimg like 'http%'").fetchone() logger.info("Caching covers for %s books" % covers['counter']) images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"') for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = bookwork.cache_cover(bookid, bookimg) if newimg != bookimg: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) logger.info('Library scan complete')
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ if not startdir: if not lazylibrarian.DESTINATION_DIR: return 0 else: startdir = lazylibrarian.DESTINATION_DIR if not os.path.isdir(startdir): logger.warn( 'Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: myDB.action('DELETE from stats') logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.info( "Adding new author [%s]" % author) try: addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = book_filename.rsplit(os.sep, 1)[0] coverimg = os.path.join(bookdir, 'cover.jpg') cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') cacheimg = os.path.join(cachedir, bookid + '.jpg') if os.path.isfile(coverimg): copyfile(coverimg, cacheimg) new_book_count += 1 else: logger.debug( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s new/modified book%s found and added to the database" % (new_book_count, plural(new_book_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) # show statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)']))) logger.debug("Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug("Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)']))) logger.debug("Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug("Unable to cache %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug("Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) authors = myDB.select('select AuthorID from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr else: # single author/book import authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg is not None: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg is not None: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) setWorkPages() logger.info('Library scan complete') return new_book_count
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(bookid, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName)) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = "" count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) language = "Unknown" isbn = "" book = "" author = "" words = files.split(".") extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] if "type" in res: extn = res["type"] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" if author[1] == " ": author = author.replace(" ", ".") author = author.replace("..", ".") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name) ) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author: logger.debug("Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', "").replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid ).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid) ) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if stats["sum(GR_book_hits)"] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"]) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select("select AuthorName from authors") # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug("Updating bookcounts for %i authors" % len(authors)) for author in authors: name = author["AuthorName"] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name ).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name ).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name)) logger.info("Library scan complete")
def addAuthorNameToDB(author=None, refresh=False, addbooks=True): # get authors name in a consistent format, look them up in the database # if not in database, try to import them. # return authorname,new where new=False if author already in db, new=True if added # authorname returned is our preferred name, or empty string if not found or unable to add myDB = database.DBConnection() new = False if len(author) < 2: logger.debug('Invalid Author Name [%s]' % author) return "", "", False author = formatAuthorName(author) # Check if the author exists, and import the author if not, check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorName="%s"' % author.replace('"', '""')) if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']: logger.debug('Author %s not found in database, trying to add' % author) # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except Exception as e: logger.warn("Error finding author id for [%s] %s" % (author, str(e))) return "", "", False # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] #authorid = author_gr['authorid'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', ' ') match_auth = ' '.join(match_auth.split()) match_name = authorname.replace('.', ' ') match_name = ' '.join(match_name.split()) match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] to authorname [%s] fuzz [%d]" % (author, match_name, match_fuzz)) # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] authorid = author_gr['authorid'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: logger.debug('Found goodreads authorname %s in database' % author) else: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks) check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: new = True except Exception: logger.debug('Failed to add author [%s] to db' % author) # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) return "", "", False return author, check_exist_author['AuthorID'], new
def addAuthorToDB(authorname=None, refresh=False): """ Add an author to the database, and get list of all their books If author already exists in database, refresh their details and booklist """ try: myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return new_img = False if authorimg and 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) new_img = True if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg new_img = True if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if dbauthor: update_totals(authorid) logger.debug("[%s] Author update complete" % authorname) except Exception as e: logger.error('Unhandled exception in addAuthorToDB: %s' % traceback.format_exc())
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() if not lazylibrarian.GB_API: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.GB_API jsonresults, in_cache = self.get_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookname) return bookname = jsonresults['volumeInfo']['title'] bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted # this book booklang = jsonresults['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug( 'Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = None try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = None except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.googleapis.com/books/v1/volumes/' + str(bookid) + "?key="+lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read()) # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = jsonresults['volumeInfo']['title'] booknametmp1=booknamealt.replace(u'\xf6',u'oe') booknametmp2=booknametmp1.replace(u'\xe4',u'ae') booknametmp3=booknametmp2.replace(u'\xdf',u'ss') booknametmp4=booknametmp3.replace(u'\xc4',u'Ae') booknametmp5=booknametmp4.replace(u'\xdc',u'Ue') booknametmp6=booknametmp5.replace(u'\xd6',u'Oe') booknametmp7=booknametmp6.replace(':','') bookname=booknametmp7.replace(u'\xfc',u'ue') # Darkie67 end try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field' % bookname) try: #skip if language is in ignore list booklang = jsonresults['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.googleapis.com/books/v1/volumes/' + str( bookid) + "?key=" + lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) bookname = jsonresults['volumeInfo']['title'] try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field' % bookname) try: #skip if language is in ignore list booklang = jsonresults['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn( 'Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn( 'Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID, )) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'], )) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [ 'Skipped', 'Ignored' ]: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian. CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') new_authors = [] logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action( 'update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action( 'update books set BookFile="" where BookID="%s"' % bookID) logger.warn( 'Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same # subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR late match = 0 extn = "" if '.' in files: words = files.split('.') extn = words[len(words) - 1] if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" match = 1 logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata # from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) match = 1 else: logger.debug("Book meta incomplete in %s" % book_filename) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: processed_subdirectories.append( subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" if not formatter.is_valid_isbn(isbn): isbn = "" if isbn != "" and language != "Unknown": logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a # different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already # there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join( r, files).encode( lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) if new_book_count: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)']) stats = len( myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn( "There are %s books in your library with unknown language" % stats) logger.debug('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' % ('Open', auth))) myDB.action( 'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks, auth)) totalbooks = len( myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' % ('Ignored', auth))) myDB.action( 'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks, auth)) logger.info('Library scan complete')
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GR.get_author_books(authorid) for book in books: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookSub": book['booksub'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookPub": book['bookpub'], "BookGenre": book['bookgenre'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount + 1 lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone() bookCount = myDB.select( "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) for count in bookCount: controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": count['counter'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % str(count['counter']))