def import_book(bookid, wait=False): """ search goodreads or googlebooks for a bookid and import the book """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) if not wait: _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, "Wanted"]).start() else: GB.find_book(bookid, "Wanted") else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) if not wait: _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, "Wanted"]).start() else: GR.find_book(bookid, "Wanted")
def import_book(bookid, ebook=None, audio=None, wait=False): """ search goodreads or googlebooks for a bookid and import the book ebook/audio=None makes find_book use configured default """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) if not wait: _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, ebook, audio]).start() else: GB.find_book(bookid, ebook, audio) else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) if not wait: _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, ebook, audio]).start() else: GR.find_book(bookid, ebook, audio)
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())