def _addAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] try: importer.addAuthorToDB(self.id, refresh=False) except Exception as e: self.data = e
def _addAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] try: addAuthorToDB(self.id, refresh=False) except Exception as e: self.data = str(e)
def _refreshAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] try: importer.addAuthorToDB(self.id, refresh=True) except Exception as e: self.data = e
def processAlternate(source_dir=None): # import a book from an alternate directory if not source_dir or os.path.isdir(source_dir) is False: logger.warn('Alternate directory must not be empty') return if source_dir == lazylibrarian.DESTINATION_DIR: logger.warn('Alternate directory must not be the same as destination') return new_book = book_file(source_dir, booktype='book') if new_book: # see if there is a metadata file in this folder with the info we need metafile = librarysync.opf_file(source_dir) try: metadata = librarysync.get_book_info(metafile) except: metadata = {} if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] # if not, try to get metadata from the book file else: try: metadata = librarysync.get_book_info(new_book) except: metadata = {} if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir)
def dbUpdate(forcefull=False): myDB = database.DBConnection() activeauthors = myDB.select('SELECT AuthorID, AuthorName from authors WHERE Status="Active" \ or Status="Loading" order by DateAdded ASC') logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors)))) for author in activeauthors: # authorid = author[0] authorname = author[1] importer.addAuthorToDB(authorname, refresh=True) logger.info('Active author update complete')
def _refreshAuthor(self, **kwargs): refresh = False if 'refresh' in kwargs: refresh = True if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] try: addAuthorToDB(self.id, refresh=refresh) except Exception as e: self.data = "%s %s" % (type(e).__name__, str(e))
def dbUpdate(forcefull=False): myDB = database.DBConnection() activeauthors = myDB.select('SELECT AuthorID, AuthorName from authors WHERE Status="Active" or Status="Loading" order by DateAdded ASC') logger.info('Starting update for %i active authors' % len(activeauthors)) for author in activeauthors: authorid = author[0] authorname = author[1] importer.addAuthorToDB(authorname, refresh=True) logger.info('Active author update complete')
def dbUpdate(refresh=False): try: myDB = database.DBConnection() activeauthors = myDB.select('SELECT AuthorName from authors WHERE Status="Active" \ or Status="Loading" order by DateAdded ASC') logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors)))) for author in activeauthors: authorname = author[0] importer.addAuthorToDB(authorname, refresh=refresh) logger.info('Active author update complete') except Exception as e: logger.error('Unhandled exception in dbUpdate: %s' % traceback.format_exc())
def db_v29(myDB, upgradelog): if not has_column(myDB, "books", "WorkID"): lazylibrarian.UPDATE_MSG = 'Adding WorkID to member and books tables' upgradelog.write("%s v29: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG)) myDB.action('ALTER TABLE books ADD COLUMN WorkID TEXT') if not has_column(myDB, "member", "WorkID"): myDB.action('ALTER TABLE member ADD COLUMN WorkID TEXT') myDB.action('DROP TABLE IF EXISTS temp_table') myDB.action('ALTER TABLE series RENAME TO temp_table') myDB.action('CREATE TABLE series (SeriesID INTEGER UNIQUE, SeriesName TEXT, Status TEXT)') myDB.action('INSERT INTO series SELECT SeriesID,SeriesName,Status FROM temp_table') myDB.action('DROP TABLE temp_table') if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': authors = myDB.select('SELECT AuthorID,AuthorName,TotalBooks from authors WHERE Status != "Ignored"') books = myDB.match('SELECT sum(totalbooks) as total from authors WHERE Status != "Ignored"') tot = len(authors) if tot: upgradelog.write("%s v29: Upgrading %s authors, %s books\n" % (time.ctime(), tot, books['total'])) start_count = int(books['total']) + tot start_time = time.time() entries_done = 0 myDB.action('DELETE FROM seriesauthors') cnt = 0 for author in authors: cnt += 1 expected_books = author['TotalBooks'] if not expected_books: expected_books = '0' lazylibrarian.UPDATE_MSG = "Updating %s (%s books): %s" % (author['AuthorName'], expected_books, calc_eta(start_time, start_count, entries_done)) addAuthorToDB(authorname=None, refresh=True, authorid=author['AuthorID'], addbooks=True) entries_done += int(expected_books) # may have extra books now, don't overcount entries_done += 1 # one less author members = myDB.select('SELECT BookID from member') tot = len(members) if tot: upgradelog.write("%s v29: Upgrading %s series members\n" % (time.ctime(), tot)) cnt = 0 for member in members: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating series members %s of %s" % (cnt, tot) res = myDB.match('SELECT WorkID from books WHERE BookID=?', (member['BookID'],)) if res: myDB.action('UPDATE member SET WorkID=? WHERE BookID=?', (res['WorkID'], member['BookID'])) upgradelog.write("%s v29: complete\n" % time.ctime())
def processAlternate(source_dir=None): # import a book from an alternate directory if source_dir == None or source_dir == "": logger.warn('Alternate directory must not be empty') return if source_dir == lazylibrarian.DESTINATION_DIR: logger.warn('Alternate directory must not be the same as destination') return new_book = book_file(source_dir) if new_book: # see if there is a metadata file in this folder with the info we need metafile = librarysync.opf_file(source_dir) try: metadata = librarysync.get_book_info(metafile) except: metadata = {} if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] # if not, try to get metadata from the book file else: try: metadata = librarysync.get_book_info(new_book) except: metadata = {} if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir)
def _addAuthorID(self, **kwargs): if 'id' not in kwargs: self.data = 'Missing parameter: id' return else: self.id = kwargs['id'] try: self.data = addAuthorToDB(refresh=False, authorid=self.id) except Exception as e: self.data = "%s %s" % (type(e).__name__, str(e))
def processAlternate(source_dir=None): # import a book from an alternate directory try: if not source_dir: logger.warn("Alternate Directory not configured") return False elif not os.path.isdir(source_dir): logger.warn("Alternate Directory [%s] not found" % source_dir) return False if source_dir == lazylibrarian.DIRECTORY('Destination'): logger.warn('Alternate directory must not be the same as Destination') return False logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = opf_file(source_dir) if metafile and os.path.isfile(metafile): try: metadata = get_book_info(metafile) except Exception as e: logger.debug('Failed to read metadata from %s, %s' % (metafile, str(e))) else: logger.debug('No metadata file found for %s' % new_book) if 'title' not in metadata or 'creator' not in metadata: # if not got both, try to get metadata from the book file try: metadata = get_book_info(new_book) except Exception as e: logger.debug('No metadata found in %s, %s' % (new_book, str(e))) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if not authmatch: # try goodreads preferred authorname logger.debug("Checking GoodReads for [%s]" % authorname) GR = GoodReads(authorname) try: author_gr = GR.find_author_id() except Exception: logger.debug("No author id for [%s]" % authorname) if author_gr: grauthorname = author_gr['authorname'] logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname)) authorname = grauthorname authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) bookid = find_book_in_db(myDB, authorname, bookname) if bookid: return import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir) return False except Exception as e: logger.error('Unhandled exception in processAlternate: %s' % traceback.format_exc())
def refreshAuthor(self, AuthorID, AuthorName): importer.addAuthorToDB(AuthorName) raise cherrypy.HTTPRedirect("authorPage?AuthorName=%s" % AuthorName)
def processAlternate(source_dir=None): # import a book from an alternate directory if not source_dir or os.path.isdir(source_dir) is False: logger.warn('Alternate directory not found') return if source_dir == lazylibrarian.DESTINATION_DIR: logger.warn('Alternate directory must not be the same as destination') return logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = librarysync.opf_file(source_dir) if os.path.isfile(metafile): try: metadata = librarysync.get_book_info(metafile) except: logger.debug('Failed to read metadata from %s' % metafile) else: logger.debug('No metadata file found for %s' % new_book) if not 'title' in metadata and 'creator' in metadata: # try to get metadata from the book file try: metadata = librarysync.get_book_info(new_book) except: logger.debug('No metadata found in %s' % new_book) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir)
def processCSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found, and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Alternate Directory must not be empty") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row # by taking a slice from item 1 as we don't need the very first header. headers = row[1:] else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. content[row[0]] = dict(zip(headers, row[1:])) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of bookIDs # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys())) for bookid in content.keys(): authorname = formatter.latinToAscii(content[bookid]['Author']) authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug(u"CSV: Author %s found in database" % (authorname)) else: logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = 0 isbn10 = "" isbn13 = "" bookname = formatter.latinToAscii(content[bookid]['Title']) if 'ISBN' in headers: isbn10 = content[bookid]['ISBN'] if 'ISBN13' in headers: isbn13 = content[bookid]['ISBN13'] # try to find book in our database using isbn, or if that fails, name matching if formatter.is_valid_isbn(isbn10): bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone() if not bookmatch: if formatter.is_valid_isbn(isbn13): bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone() if not bookmatch: bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone() if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Please check Alternate Directory setting") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of keys # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if hasattr(authorname, 'decode'): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: newauthor = False logger.debug(u"CSV: Author %s found in database" % (authorname)) else: newauthor = True logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = finditem(content[item], headers) # if we didn't find it, maybe author info is stale if not bookmatch and not newauthor: addAuthorToDB(authorname, refresh=True) bookmatch = finditem(content[item], headers) if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info(u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) new_authors = [] logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if lazylibrarian.FULL_SCAN: books = myDB.select("select AuthorName, BookName, BookFile, BookID from books where Status=?", [u"Open"]) status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if os.path.isfile(bookfile): book_exists = True else: myDB.action("update books set Status=? where BookID=?", [status, bookID]) myDB.action('update books set BookFile="" where BookID=?', [bookID]) logger.info("Book %s updated as not found on disk" % bookfile) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # #Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # #dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = "" count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: logger.info( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR later # # Is it a book (extension found in booktypes) match = 0 words = files.split(".") extn = words[len(words) - 1] if extn in booktypes: # see if there is a metadata file in this folder with the info we need try: metafile = os.path.join(r, "metadata.opf").encode(lazylibrarian.SYS_ENCODING) res = get_book_info(metafile) if res: book = res["title"] author = res["creator"] language = res["language"] isbn = res["identifier"] match = 1 logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) except: logger.debug("No metadata file in %s" % r) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) res = get_book_info(book_file) if res: book = res["title"] author = res["creator"] language = res["language"] isbn = res["identifier"] match = 1 logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) else: processed_subdirectories.append(subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" # strip any formatting from the isbn isbn = re.sub("[- ]", "", isbn) if len(isbn) != 10 and len(isbn) != 13: isbn = "" if not isbn.isdigit(): isbn = "" if isbn != "" and language != "Unknown": logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already there if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" author = author.replace(". ", " ") author = author.replace(".", " ") author = author.replace(" ", " ") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?", [author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.error("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The (currently non-configurable) value of fuzziness works for one accented character # We stored GoodReads unmodified author name in author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.info("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.info("match author [%s] authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the database, so check again check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author] ).fetchone() if not check_exist_author: logger.info("Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author] ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.info("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, we already had it) check_status = myDB.action("SELECT Status from books where BookID=?", [bookid]).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action("UPDATE books set Status=? where BookID=?", [u"Open", bookid]) book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it gets removed, or maybe allow click-to-open? myDB.action("UPDATE books set BookFile=? where BookID=?", [book_file, bookid]) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.info("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.info("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.info("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.info("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.info("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.info("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.info("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.info("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.info("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.info("ISBN Language cache holds %s entries" % cachesize["count(*)"]) stats = len(myDB.select("select BookID from Books where status=? and BookLang=?", ["Open", "Unknown"])) logger.info("There are %s books in your library with unknown language" % stats) logger.info("Updating %i authors" % len(new_authors)) for auth in new_authors: havebooks = len(myDB.select("select BookName from Books where status=? and AuthorName=?", ["Open", auth])) myDB.action("UPDATE authors set HaveBooks=? where AuthorName=?", [havebooks, auth]) totalbooks = len(myDB.select("select BookName from Books where status!=? and AuthorName=?", ["Ignored", auth])) myDB.action("UPDATE authors set UnignoredBooks=? where AuthorName=?", [totalbooks, auth]) logger.info("Library scan complete")
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode('utf-8') subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (dir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" words = files.split('.') extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select('select AuthorName from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug('Updating bookcounts for %i authors' % len(authors)) for author in authors: name = author['AuthorName'] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name)) covers = myDB.action("select count('bookimg') as counter from books where bookimg like 'http%'").fetchone() logger.info("Caching covers for %s books" % covers['counter']) images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"') for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = bookwork.cache_cover(bookid, bookimg) if newimg != bookimg: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) logger.info('Library scan complete')
def processCSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found, and marking the books as "Wanted" """ if not search_dir: logger.warn("Alternate Directory must not be empty") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn("No CSV file found in %s" % search_dir) else: logger.debug('Reading file %s' % csvFile) reader=csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row # by taking a slice from item 1 as we don't need the very first header. headers = row[1:] else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. content[row[0]] = dict(zip(headers, row[1:])) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: #print content.keys() # to get a list of bookIDs # To see the list of fields available for each book #print headers if 'Author' not in headers or 'Title' not in headers: logger.warn('Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug("CSV: Found %s entries in csv file" % len(content.keys())) for bookid in content.keys(): authorname = content[bookid]['Author'] authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("CSV: Author %s found in database" % (authorname)) else: logger.debug("CSV: Author %s not found, adding to database" % (authorname)) importer.addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = 0 isbn10="" isbn13="" bookname = content[bookid]['Title'] if 'ISBN' in headers: isbn10 = content[bookid]['ISBN'] if 'ISBN13' in headers: isbn13 = content[bookid]['ISBN13'] # try to find book in our database using isbn, or if that fails, fuzzy name matching if formatter.is_valid_isbn(isbn10): bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone() if not bookmatch: if formatter.is_valid_isbn(isbn13): bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone() if not bookmatch: bookid = librarysync.find_book_in_db(myDB, authorname, bookname) if bookid: bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone() if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info('Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn("Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info("Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))
def refreshAuthor(self, AuthorID): importer.addAuthorToDB(AuthorID) raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)
def refreshAuthor(self, AuthorID): importer.addAuthorToDB(AuthorID) logger.debug('Refresh Author page for Author %s ' % AuthorID) raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() new_authors = [] logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if (lazylibrarian.FULL_SCAN): books = myDB.select( 'select AuthorName, BookName from books where Status=?', [u'Open']) status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: for book_type in getList(lazylibrarian.EBOOK_TYPE): bookName = book['BookName'] bookAuthor = book['AuthorName'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', bookAuthor).replace('$Title', bookName) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', bookAuthor).replace('$Title', bookName) encoded_book_path = os.path.join( dir, dest_path, global_name + "." + book_type).encode( lazylibrarian.SYS_ENCODING) if os.path.isfile(encoded_book_path): book_exists = True if not book_exists: myDB.action( 'update books set Status=? where AuthorName=? and BookName=?', [status, bookAuthor, bookName]) logger.info('Book %s updated as not found on disk' % encoded_book_path.decode( lazylibrarian.SYS_ENCODING, 'replace')) if bookAuthor not in new_authors: new_authors.append(bookAuthor) latest_subdirectory = [] for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) #prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) logger.info( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace( "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE) pattern = re.compile(matchString, re.VERBOSE) match = pattern.match(files) if match: author = match.group("author") book = match.group("book") #check if book is in database, and not marked as in library check_exist_book = myDB.action( "SELECT * FROM books where AuthorName=? and BookName=? and Status!=?", [author, book, 'Open']).fetchone() if not check_exist_book: check_exist_author = myDB.action( "SELECT * FROM authors where AuthorName=?", [author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: GR = GoodReads(author) try: author_gr = GR.find_author_id() except: continue #only try to add if GR data matches found author data if author_gr: authorid = author_gr['authorid'] authorlink = author_gr['authorlink'] pageIdx = authorlink.rfind('/') authorlink = authorlink[pageIdx + 1:] #match_auth = authorid+"."+author.replace('. ','_') #Original Line does not allow author match. match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_auth = authorid + "." + match_auth # Hopefully someone can come up with a more efficient way of doing this. logger.debug(match_auth) logger.debug(authorlink) if match_auth == authorlink: logger.info("Adding %s" % author) try: importer.addAuthorToDB(author) except: continue check_exist_book = myDB.action( "SELECT * FROM books where AuthorName=? and BookName=?", [author, book]).fetchone() if check_exist_book: if author not in new_authors: new_authors.append(author) myDB.action( 'UPDATE books set Status=? where AuthorName=? and BookName=?', ['Open', author, book]) new_book_count += 1 else: logger.info( "Unable to match %s in GoodReads database" % author) else: if author not in new_authors: new_authors.append(author) myDB.action( 'UPDATE books set Status=? where AuthorName=? and BookName=?', ['Open', author, book]) new_book_count += 1 file_count += 1 logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select( 'select BookName from Books where status=? and AuthorName=?', ['Open', auth])) myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?', [havebooks, auth]) totalbooks = len( myDB.select( 'select BookName from Books where status!=? and AuthorName=?', ['Ignored', auth])) myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?', [totalbooks, auth]) logger.info('Library scan complete')
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ if not startdir: if not lazylibrarian.DESTINATION_DIR: return 0 else: startdir = lazylibrarian.DESTINATION_DIR if not os.path.isdir(startdir): logger.warn( 'Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: myDB.action('DELETE from stats') logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.info( "Adding new author [%s]" % author) try: addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = book_filename.rsplit(os.sep, 1)[0] coverimg = os.path.join(bookdir, 'cover.jpg') cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') cacheimg = os.path.join(cachedir, bookid + '.jpg') if os.path.isfile(coverimg): copyfile(coverimg, cacheimg) new_book_count += 1 else: logger.debug( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s new/modified book%s found and added to the database" % (new_book_count, plural(new_book_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) # show statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)']))) logger.debug("Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug("Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)']))) logger.debug("Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug("Unable to cache %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug("Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) authors = myDB.select('select AuthorID from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr else: # single author/book import authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg is not None: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg is not None: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) setWorkPages() logger.info('Library scan complete') return new_book_count
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) return myDB = database.DBConnection() myDB.action("drop table if exists stats") myDB.action( "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \ GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )" ) logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace")) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info("Missing books will be marked as %s" % status) for book in books: bookName = book["BookName"] bookAuthor = book["AuthorName"] bookID = book["BookID"] bookfile = book["BookFile"] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName)) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = "" for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + "\\" + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = "" count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + "|" + book_type matchString = ( matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + "\.[" + booktypes + "]" ) pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, "") # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % ( dir.decode(lazylibrarian.SYS_ENCODING, "replace"), subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"), ) ) language = "Unknown" isbn = "" book = "" author = "" words = files.split(".") extn = words[len(words) - 1] # if it's an epub or a mobi we can try to read metadata from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] if "type" in res: extn = res["type"] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if "title" in res and "creator" in res: # this is the minimum we need match = 1 book = res["title"] author = res["creator"] if "language" in res: language = res["language"] if "identifier" in res: isbn = res["identifier"] logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and formatter.is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(",") author = words[1].strip() + " " + words[0].strip() # "forename surname" if author[1] == " ": author = author.replace(" ", ".") author = author.replace("..", ".") # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn("Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr["authorname"] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace(".", "_") match_auth = match_auth.replace(" ", "_") match_auth = match_auth.replace("__", "_") match_name = authorname.replace(".", "_") match_name = match_name.replace(" ", "_") match_name = match_name.replace("__", "_") match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name) ) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr["authorname"] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() if not check_exist_author: logger.debug("Adding new author [%s]" % author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author ).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', "").replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid ).fetchone() if check_status["Status"] != "Open": # update status as we've got this book myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid) ) new_book_count += 1 cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone() logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats" ).fetchone() if stats["sum(GR_book_hits)"] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"]) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"]) logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"]) logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"]) logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"]) logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"]) logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"]) logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"]) logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS)) logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"]) stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn("There are %s books in your library with unknown language" % stats) authors = myDB.select("select AuthorName from authors") # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr logger.debug("Updating bookcounts for %i authors" % len(authors)) for author in authors: name = author["AuthorName"] havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % name ).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name ).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name)) logger.info("Library scan complete")
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() new_authors = [] logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) book_list = [] new_book_count = 0 file_count = 0 book_exists = False if (lazylibrarian.FULL_SCAN): books = myDB.select('select AuthorName, BookName from books where Status=?',[u'Open']) status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: for book_type in getList(lazylibrarian.EBOOK_TYPE): bookName = book['BookName'] bookAuthor = book['AuthorName'] #Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) #dest_path = authorname+'/'+bookname global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) if os.path.isfile(encoded_book_path): book_exists = True if not book_exists: myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) latest_subdirectory = [] for r,d,f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) #prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: subdirectory = r.replace(dir,'') latest_subdirectory.append(subdirectory) logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching booktypes = '' count=-1; booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count+=1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|'+book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e","(?P<book>.*?)")+'\.['+booktypes+']' #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE) pattern = re.compile(matchString, re.VERBOSE) match = pattern.match(files) if match: author = match.group("author") book = match.group("book") #check if book is in database, and not marked as in library check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",[author,book,'Open']).fetchone() if not check_exist_book: check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?",[author]).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: GR = GoodReads(author) try: author_gr = GR.find_author_id() except: continue #only try to add if GR data matches found author data if author_gr: authorid = author_gr['authorid'] authorlink = author_gr['authorlink'] pageIdx = authorlink.rfind('/') authorlink = authorlink[pageIdx+1:] match_auth = authorid+"."+author.replace('. ','_') logger.debug(match_auth) logger.debug(authorlink) if match_auth == authorlink: logger.info("Adding %s" % author) try: importer.addAuthorToDB(author) except: continue check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=?",[author,book]).fetchone() if check_exist_book: if author not in new_authors: new_authors.append(author) myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book]) new_book_count += 1 else: logger.info("Unable to match %s in GoodReads database" % author) else: if author not in new_authors: new_authors.append(author) myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book]) new_book_count += 1 file_count += 1 logger.info("%s new/modified books found and added to the database" % new_book_count) logger.info('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len(myDB.select('select BookName from Books where status=? and AuthorName=?',['Open',auth])) myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',[havebooks,auth]) totalbooks = len(myDB.select('select BookName from Books where status!=? and AuthorName=?',['Ignored',auth])) myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',[totalbooks,auth]) logger.info('Library scan complete')
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn(u"Please check Alternate Directory setting") return False csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: logger.warn(u"No CSV file found in %s" % search_dir) else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: # print content.keys() # to get a list of keys # To see the list of fields available for each book # print headers if 'Author' not in headers or 'Title' not in headers: logger.warn(u'Invalid CSV file found %s' % csvFile) return myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if hasattr(authorname, 'decode'): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: newauthor = False logger.debug(u"CSV: Author %s found in database" % (authorname)) else: newauthor = True logger.debug(u"CSV: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) authcount = authcount + 1 bookmatch = finditem(content[item], headers) # if we didn't find it, maybe author info is stale if not bookmatch and not newauthor: addAuthorToDB(authorname, refresh=True) bookmatch = finditem(content[item], headers) if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have': logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount = bookcount + 1 else: logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname)) skipcount = skipcount + 1 logger.info( u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
def LibraryScan(dir=None): if not dir: if not lazylibrarian.DOWNLOAD_DIR: return else: dir = lazylibrarian.DOWNLOAD_DIR if not os.path.isdir(dir): logger.warn( 'Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) return myDB = database.DBConnection() myDB.action('drop table if exists stats') myDB.action( 'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )') new_authors = [] logger.info( 'Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace')) new_book_count = 0 file_count = 0 if lazylibrarian.FULL_SCAN: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action( 'update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action( 'update books set BookFile="" where BookID="%s"' % bookID) logger.warn( 'Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # for book_type in getList(lazylibrarian.EBOOK_TYPE): # bookName = book['BookName'] # bookAuthor = book['AuthorName'] # Default destination path, should be allowed change per config file. # dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName) # dest_path = authorname+'/'+bookname # global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName) # # encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING) # if os.path.isfile(encoded_book_path): # book_exists = True # if not book_exists: # myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName]) # logger.info('Book %s updated as not found on disk' % # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') ) if bookAuthor not in new_authors: new_authors.append(bookAuthor) # guess this was meant to save repeat-scans of the same directory # if it contains multiple formats of the same book, but there was no code # that looked at the array. renamed from latest to processed to make # purpose clearer processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(dir): for directory in d[:]: if directory.startswith("."): d.remove(directory) # prevent magazine being scanned if directory.startswith("_"): d.remove(directory) for files in f: file_count += 1 subdirectory = r.replace(dir, '') # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same # subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # If metadata.opf exists, use that # else if epub or mobi, read metadata from the book # else have to try pattern match for author/title and look up isbn/lang from LT or GR late match = 0 extn = "" if '.' in files: words = files.split('.') extn = words[len(words) - 1] if formatter.is_valid_booktype(files): logger.debug( "[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace'))) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" match = 1 logger.debug( "file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # it's a book, but no external metadata found # if it's an epub or a mobi we can try to read metadata # from it if (extn == "epub") or (extn == "mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] else: language = "" if 'identifier' in res: isbn = res['identifier'] else: isbn = "" logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) match = 1 else: logger.debug("Book meta incomplete in %s" % book_filename) if not match: match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: processed_subdirectories.append( subdirectory) # flag that we found a book in this subdirectory # # If we have a valid looking isbn, and language != "Unknown", add it to cache # if not language: language = "Unknown" if not formatter.is_valid_isbn(isbn): isbn = "" if isbn != "" and language != "Unknown": logger.debug( "Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug( "Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip() # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data # not sure what this is for, never seems to fail?? if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = common.remove_accents(match_name) match_auth = common.remove_accents(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that match_fuzz = fuzz.ratio(match_auth, match_name) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a # different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.debug( "Adding new author [%s]" % author) if author not in new_authors: new_authors.append(author) try: importer.addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already # there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join( r, files).encode( lazylibrarian.SYS_ENCODING) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) new_book_count += 1 cachesize = myDB.action("select count(*) from languages").fetchone() logger.info( "%s new/modified books found and added to the database" % new_book_count) logger.info("%s files processed" % file_count) if new_book_count: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone() if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug( "GoogleBooks was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoogleBooks language was changed %s times" % stats['sum(GB_lang_change)']) if lazylibrarian.BOOK_API == "GoodReads": logger.debug( "GoodReads was hit %s times for books" % stats['sum(GR_book_hits)']) logger.debug( "GoodReads was hit %s times for languages" % stats['sum(GR_lang_hits)']) logger.debug( "LibraryThing was hit %s times for languages" % stats['sum(LT_lang_hits)']) logger.debug( "Language cache was hit %s times" % stats['sum(cache_hits)']) logger.debug( "Unwanted language removed %s books" % stats['sum(bad_lang)']) logger.debug( "Unwanted characters removed %s books" % stats['sum(bad_char)']) logger.debug( "Unable to cache %s books with missing ISBN" % stats['sum(uncached)']) logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)']) stats = len( myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"')) if stats: logger.warn( "There are %s books in your library with unknown language" % stats) logger.debug('Updating %i authors' % len(new_authors)) for auth in new_authors: havebooks = len( myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' % ('Open', auth))) myDB.action( 'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks, auth)) totalbooks = len( myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' % ('Ignored', auth))) myDB.action( 'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks, auth)) logger.info('Library scan complete')
def refreshAuthor(self, AuthorID): importer.addAuthorToDB(AuthorID) logger.debug('Refresh Author page for Author %s '% AuthorID) raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)