Ejemplo n.º 1
0
 def _addAuthor(self, **kwargs):
     if 'name' not in kwargs:
         self.data = 'Missing parameter: name'
         return
     else:
         self.id = kwargs['name']
     try:
         importer.addAuthorToDB(self.id, refresh=False)
     except Exception as e:
         self.data = e
Ejemplo n.º 2
0
 def _addAuthor(self, **kwargs):
     if 'name' not in kwargs:
         self.data = 'Missing parameter: name'
         return
     else:
         self.id = kwargs['name']
     try:
         addAuthorToDB(self.id, refresh=False)
     except Exception as e:
         self.data = str(e)
Ejemplo n.º 3
0
    def _refreshAuthor(self, **kwargs):
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return
        else:
            self.id = kwargs['name']

        try:
            importer.addAuthorToDB(self.id, refresh=True)
        except Exception as e:
            self.data = e
Ejemplo n.º 4
0
def processAlternate(source_dir=None):
    # import a book from an alternate directory
    if not source_dir or os.path.isdir(source_dir) is False:
        logger.warn('Alternate directory must not be empty')
        return
    if source_dir == lazylibrarian.DESTINATION_DIR:
        logger.warn('Alternate directory must not be the same as destination')
        return
    new_book = book_file(source_dir, booktype='book')
    if new_book:
        # see if there is a metadata file in this folder with the info we need
        metafile = librarysync.opf_file(source_dir)
        try:
            metadata = librarysync.get_book_info(metafile)
        except:
            metadata = {}
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
        # if not, try to get metadata from the book file
        else:
            try:
                metadata = librarysync.get_book_info(new_book)
            except:
                metadata = {}
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
            myDB = database.DBConnection()

            authmatch = myDB.action(
                'SELECT * FROM authors where AuthorName="%s"' %
                (authorname)).fetchone()

            if authmatch:
                logger.debug("ALT: Author %s found in database" % (authorname))
            else:
                logger.debug("ALT: Author %s not found, adding to database" %
                             (authorname))
                importer.addAuthorToDB(authorname)

            bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
            if bookid:
                import_book(source_dir, bookid)
            else:
                logger.warn("Book %s by %s not found in database" %
                            (bookname, authorname))
        else:
            logger.warn('Book %s has no metadata, unable to import' % new_book)
    else:
        logger.warn("No book file found in %s" % source_dir)
Ejemplo n.º 5
0
def dbUpdate(forcefull=False):

    myDB = database.DBConnection()

    activeauthors = myDB.select('SELECT AuthorID, AuthorName from authors WHERE Status="Active" \
                                or Status="Loading" order by DateAdded ASC')
    logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors))))

    for author in activeauthors:
        # authorid = author[0]
        authorname = author[1]
        importer.addAuthorToDB(authorname, refresh=True)

    logger.info('Active author update complete')
Ejemplo n.º 6
0
    def _refreshAuthor(self, **kwargs):
        refresh = False
        if 'refresh' in kwargs:
            refresh = True
        if 'name' not in kwargs:
            self.data = 'Missing parameter: name'
            return
        else:
            self.id = kwargs['name']

        try:
            addAuthorToDB(self.id, refresh=refresh)
        except Exception as e:
            self.data = "%s %s" % (type(e).__name__, str(e))
Ejemplo n.º 7
0
def dbUpdate(forcefull=False):

    myDB = database.DBConnection()

    activeauthors = myDB.select('SELECT AuthorID, AuthorName from authors WHERE Status="Active" or Status="Loading" order by DateAdded ASC')
    logger.info('Starting update for %i active authors' % len(activeauthors))
    
    for author in activeauthors:
    
        authorid = author[0]
        authorname = author[1]
        importer.addAuthorToDB(authorname, refresh=True)
        
    logger.info('Active author update complete')
Ejemplo n.º 8
0
def dbUpdate(refresh=False):
    try:
        myDB = database.DBConnection()

        activeauthors = myDB.select('SELECT AuthorName from authors WHERE Status="Active" \
                                    or Status="Loading" order by DateAdded ASC')
        logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors))))

        for author in activeauthors:
            authorname = author[0]
            importer.addAuthorToDB(authorname, refresh=refresh)

        logger.info('Active author update complete')
    except Exception as e:
        logger.error('Unhandled exception in dbUpdate: %s' % traceback.format_exc())
Ejemplo n.º 9
0
def db_v29(myDB, upgradelog):
    if not has_column(myDB, "books", "WorkID"):
        lazylibrarian.UPDATE_MSG = 'Adding WorkID to member and books tables'
        upgradelog.write("%s v29: %s\n" % (time.ctime(), lazylibrarian.UPDATE_MSG))
        myDB.action('ALTER TABLE books ADD COLUMN WorkID TEXT')
    if not has_column(myDB, "member", "WorkID"):
        myDB.action('ALTER TABLE member ADD COLUMN WorkID TEXT')
        myDB.action('DROP TABLE IF EXISTS temp_table')
        myDB.action('ALTER TABLE series RENAME TO temp_table')
        myDB.action('CREATE TABLE series (SeriesID INTEGER UNIQUE, SeriesName TEXT, Status TEXT)')
        myDB.action('INSERT INTO series SELECT SeriesID,SeriesName,Status FROM temp_table')
        myDB.action('DROP TABLE temp_table')
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        authors = myDB.select('SELECT AuthorID,AuthorName,TotalBooks from authors WHERE Status != "Ignored"')
        books = myDB.match('SELECT sum(totalbooks) as total from authors  WHERE Status != "Ignored"')
        tot = len(authors)
        if tot:
            upgradelog.write("%s v29: Upgrading %s authors, %s books\n" % (time.ctime(), tot, books['total']))
            start_count = int(books['total']) + tot
            start_time = time.time()
            entries_done = 0
            myDB.action('DELETE FROM seriesauthors')
            cnt = 0
            for author in authors:
                cnt += 1
                expected_books = author['TotalBooks']
                if not expected_books:
                    expected_books = '0'
                lazylibrarian.UPDATE_MSG = "Updating %s (%s books): %s" % (author['AuthorName'], expected_books,
                                                                           calc_eta(start_time, start_count,
                                                                                    entries_done))
                addAuthorToDB(authorname=None, refresh=True, authorid=author['AuthorID'], addbooks=True)
                entries_done += int(expected_books)  # may have extra books now, don't overcount
                entries_done += 1   # one less author

        members = myDB.select('SELECT BookID from member')
        tot = len(members)
        if tot:
            upgradelog.write("%s v29: Upgrading %s series members\n" % (time.ctime(), tot))
            cnt = 0
            for member in members:
                cnt += 1
                lazylibrarian.UPDATE_MSG = "Updating series members %s of %s" % (cnt, tot)
                res = myDB.match('SELECT WorkID from books WHERE BookID=?', (member['BookID'],))
                if res:
                    myDB.action('UPDATE member SET WorkID=? WHERE BookID=?', (res['WorkID'], member['BookID']))
    upgradelog.write("%s v29: complete\n" % time.ctime())
Ejemplo n.º 10
0
def processAlternate(source_dir=None):
# import a book from an alternate directory
    if source_dir == None or source_dir == "":
        logger.warn('Alternate directory must not be empty')
        return
    if source_dir == lazylibrarian.DESTINATION_DIR:
        logger.warn('Alternate directory must not be the same as destination')
        return
    new_book = book_file(source_dir)
    if new_book:
        # see if there is a metadata file in this folder with the info we need
        metafile = librarysync.opf_file(source_dir)
        try:
            metadata = librarysync.get_book_info(metafile)
        except:
            metadata = {}
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
        # if not, try to get metadata from the book file
        else:            
            try:
                metadata = librarysync.get_book_info(new_book)
            except:
                metadata = {}
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
            myDB = database.DBConnection()
            
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()
        
            if authmatch:
                logger.debug("ALT: Author %s found in database" % (authorname))
            else:
                logger.debug("ALT: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                
            bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
            if bookid:
                import_book(source_dir, bookid)
            else:
                logger.warn("Book %s by %s not found in database" % (bookname, authorname))
        else:
            logger.warn('Book %s has no metadata, unable to import' % new_book)
    else:
        logger.warn("No book file found in %s" % source_dir)
Ejemplo n.º 11
0
 def _addAuthorID(self, **kwargs):
     if 'id' not in kwargs:
         self.data = 'Missing parameter: id'
         return
     else:
         self.id = kwargs['id']
     try:
         self.data = addAuthorToDB(refresh=False, authorid=self.id)
     except Exception as e:
         self.data = "%s %s" % (type(e).__name__, str(e))
Ejemplo n.º 12
0
def processAlternate(source_dir=None):
    # import a book from an alternate directory
    try:
        if not source_dir:
            logger.warn("Alternate Directory not configured")
            return False
        elif not os.path.isdir(source_dir):
            logger.warn("Alternate Directory [%s] not found" % source_dir)
            return False
        if source_dir == lazylibrarian.DIRECTORY('Destination'):
            logger.warn('Alternate directory must not be the same as Destination')
            return False

        logger.debug('Processing alternate directory %s' % source_dir)
        # first, recursively process any books in subdirectories
        for fname in os.listdir(source_dir):
            subdir = os.path.join(source_dir, fname)
            if os.path.isdir(subdir):
                processAlternate(subdir)
        # only import one book from each alternate (sub)directory, this is because
        # the importer may delete the directory after importing a book,
        # depending on lazylibrarian.DESTINATION_COPY setting
        # also if multiple books in a folder and only a "metadata.opf"
        # which book is it for?
        new_book = book_file(source_dir, booktype='book')
        if new_book:
            metadata = {}
            # see if there is a metadata file in this folder with the info we need
            # try book_name.opf first, or fall back to any filename.opf
            metafile = os.path.splitext(new_book)[0] + '.opf'
            if not os.path.isfile(metafile):
                metafile = opf_file(source_dir)
            if metafile and os.path.isfile(metafile):
                try:
                    metadata = get_book_info(metafile)
                except Exception as e:
                    logger.debug('Failed to read metadata from %s, %s' % (metafile, str(e)))
            else:
                logger.debug('No metadata file found for %s' % new_book)
            if 'title' not in metadata or 'creator' not in metadata:
                # if not got both, try to get metadata from the book file
                try:
                    metadata = get_book_info(new_book)
                except Exception as e:
                    logger.debug('No metadata found in %s, %s' % (new_book, str(e)))
            if 'title' in metadata and 'creator' in metadata:
                authorname = metadata['creator']
                bookname = metadata['title']
                myDB = database.DBConnection()

                authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname))

                if not authmatch:
                    # try goodreads preferred authorname
                    logger.debug("Checking GoodReads for [%s]" % authorname)
                    GR = GoodReads(authorname)
                    try:
                        author_gr = GR.find_author_id()
                    except Exception:
                        logger.debug("No author id for [%s]" % authorname)
                    if author_gr:
                        grauthorname = author_gr['authorname']
                        logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname))
                        authorname = grauthorname
                        authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname))

                if authmatch:
                    logger.debug("ALT: Author %s found in database" % (authorname))
                else:
                    logger.debug("ALT: Author %s not found, adding to database" % (authorname))
                    addAuthorToDB(authorname)

                bookid = find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    return import_book(source_dir, bookid)
                else:
                    logger.warn("Book %s by %s not found in database" % (bookname, authorname))
            else:
                logger.warn('Book %s has no metadata, unable to import' % new_book)
        else:
            logger.warn("No book file found in %s" % source_dir)
        return False
    except Exception as e:
        logger.error('Unhandled exception in processAlternate: %s' % traceback.format_exc())
Ejemplo n.º 13
0
 def refreshAuthor(self, AuthorID, AuthorName):
     importer.addAuthorToDB(AuthorName)
     raise cherrypy.HTTPRedirect("authorPage?AuthorName=%s" % AuthorName)
Ejemplo n.º 14
0
def processAlternate(source_dir=None):
    # import a book from an alternate directory
    if not source_dir or os.path.isdir(source_dir) is False:
        logger.warn('Alternate directory not found')
        return
    if source_dir == lazylibrarian.DESTINATION_DIR:
        logger.warn('Alternate directory must not be the same as destination')
        return

    logger.debug('Processing alternate directory %s' % source_dir)
    # first, recursively process any books in subdirectories
    for fname in os.listdir(source_dir):
        subdir = os.path.join(source_dir, fname)
        if os.path.isdir(subdir):
            processAlternate(subdir)
    # only import one book from each alternate (sub)directory, this is because
    # the importer may delete the directory after importing a book,
    # depending on lazylibrarian.DESTINATION_COPY setting
    # also if multiple books in a folder and only a "metadata.opf"
    # which book is it for?
    new_book = book_file(source_dir, booktype='book')
    if new_book:
        metadata = {}
        # see if there is a metadata file in this folder with the info we need
        # try book_name.opf first, or fall back to any filename.opf
        metafile = os.path.splitext(new_book)[0] + '.opf'
        if not os.path.isfile(metafile):
            metafile = librarysync.opf_file(source_dir)
        if os.path.isfile(metafile):
            try:
                metadata = librarysync.get_book_info(metafile)
            except:
                logger.debug('Failed to read metadata from %s' % metafile)
        else:
            logger.debug('No metadata file found for %s' % new_book)
        if not 'title' in metadata and 'creator' in metadata:
            # try to get metadata from the book file
            try:
                metadata = librarysync.get_book_info(new_book)
            except:
                logger.debug('No metadata found in %s' % new_book)
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
            myDB = database.DBConnection()

            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                logger.debug("ALT: Author %s found in database" % (authorname))
            else:
                logger.debug("ALT: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)

            bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
            if bookid:
                import_book(source_dir, bookid)
            else:
                logger.warn("Book %s by %s not found in database" % (bookname, authorname))
        else:
            logger.warn('Book %s has no metadata, unable to import' % new_book)
    else:
        logger.warn("No book file found in %s" % source_dir)
Ejemplo n.º 15
0
def processCSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
    adding authors to the database if not found, and marking the books as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys()))
        for bookid in content.keys():

            authorname = formatter.latinToAscii(content[bookid]['Author'])
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                logger.debug(u"CSV: Author %s found in database" % (authorname))
            else:
                logger.debug(u"CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10 = ""
            isbn13 = ""
            bookname = formatter.latinToAscii(content[bookid]['Title'])
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch:
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" %
                    (authcount, bookcount, skipcount))
Ejemplo n.º 16
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Please check Alternate Directory setting")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                headers = row
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                # we include the key in the dictionary as our exported csv files use
                # bookid as the key
                content[row[0]] = dict(zip(headers, row))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of keys
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
        for item in content.keys():
            authorname = content[item]['Author']
            if hasattr(authorname, 'decode'):
                authorname = authorname.decode(lazylibrarian.SYS_ENCODING)

            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                newauthor = False
                logger.debug(u"CSV: Author %s found in database" % (authorname))
            else:
                newauthor = True
                logger.debug(u"CSV: Author %s not found, adding to database" % (authorname))
                addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = finditem(content[item], headers)

            # if we didn't find it, maybe author info is stale
            if not bookmatch and not newauthor:
                addAuthorToDB(authorname, refresh=True)
                bookmatch = finditem(content[item], headers)

            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" %
                    (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)))
Ejemplo n.º 17
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    new_authors = []

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    book_list = []
    new_book_count = 0
    file_count = 0
    book_exists = False

    if lazylibrarian.FULL_SCAN:
        books = myDB.select("select AuthorName, BookName, BookFile, BookID from books where Status=?", [u"Open"])
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if os.path.isfile(bookfile):
                book_exists = True
            else:
                myDB.action("update books set Status=? where BookID=?", [status, bookID])
                myDB.action('update books set BookFile="" where BookID=?', [bookID])
                logger.info("Book %s updated as not found on disk" % bookfile)
                # for book_type in getList(lazylibrarian.EBOOK_TYPE):
                # 	bookName = book['BookName']
                # 	bookAuthor = book['AuthorName']
                # 	#Default destination path, should be allowed change per config file.
                # 	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
                # 	#dest_path = authorname+'/'+bookname
                # 	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
                #
                # 	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
                # 	if os.path.isfile(encoded_book_path):
                # 		book_exists = True
                # if not book_exists:
                # 	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
                # 	logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

                # guess this was meant to save repeat-scans of the same directory
                # if it contains multiple formats of the same book, but there was no code
                # that looked at the array. renamed from latest to processed to make purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
                # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                logger.info(
                    "[%s] Now scanning subdirectory %s"
                    % (
                        dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                    )
                )

                # 			If this is a book, try to get author/title/isbn/language
                # 			If metadata.opf exists, use that
                # 			else if epub or mobi, read metadata from the book
                # 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR later
                #
                # 			Is it a book (extension found in booktypes)
                match = 0
                words = files.split(".")
                extn = words[len(words) - 1]
                if extn in booktypes:
                    # see if there is a metadata file in this folder with the info we need
                    try:
                        metafile = os.path.join(r, "metadata.opf").encode(lazylibrarian.SYS_ENCODING)
                        res = get_book_info(metafile)
                        if res:
                            book = res["title"]
                            author = res["creator"]
                            language = res["language"]
                            isbn = res["identifier"]
                            match = 1
                            logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))

                    except:
                        logger.debug("No metadata file in %s" % r)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            res = get_book_info(book_file)
                            if res:
                                book = res["title"]
                                author = res["creator"]
                                language = res["language"]
                                isbn = res["identifier"]
                                match = 1
                                logger.debug("book meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                else:
                    processed_subdirectories.append(subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                        # strip any formatting from the isbn
                    isbn = re.sub("[- ]", "", isbn)
                    if len(isbn) != 10 and len(isbn) != 13:
                        isbn = ""
                    if not isbn.isdigit():
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                        # we need to add it to language cache if not already there
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                        if not match:
                            myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                            logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                        else:
                            logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(",")
                        author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                    author = author.replace(". ", " ")
                    author = author.replace(".", " ")
                    author = author.replace("  ", " ")

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?", [author]).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.error("Error finding author id for [%s]" % author)
                            continue

                            # only try to add if GR data matches found author data
                            # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr["authorname"]

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace(".", "_")
                            match_auth = match_auth.replace(" ", "_")
                            match_auth = match_auth.replace("__", "_")
                            match_name = authorname.replace(".", "_")
                            match_name = match_name.replace(" ", "_")
                            match_name = match_name.replace("__", "_")

                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The (currently non-configurable) value of fuzziness works for one accented character
                            # We stored GoodReads unmodified author name in author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.info("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                logger.info("match author [%s] authorname [%s]" % (match_auth, match_name))

                                # To save loading hundreds of books by unknown authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a different author!
                                author = author_gr["authorname"]
                                # this new authorname may already be in the database, so check again
                                check_exist_author = myDB.action(
                                    "SELECT * FROM authors where AuthorName=?", [author]
                                ).fetchone()
                                if not check_exist_author:
                                    logger.info("Adding new author [%s]" % author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            "SELECT * FROM authors where AuthorName=?", [author]
                                        ).fetchone()
                                    except:
                                        continue

                                        # check author exists in db, either newly loaded or already there
                    if not check_exist_author:
                        logger.info("Failed to match author [%s] in database" % author)
                    else:
                        # author exists, check if this book by this author is in our database
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so, we already had it)
                            check_status = myDB.action("SELECT Status from books where BookID=?", [bookid]).fetchone()
                            if check_status["Status"] != "Open":
                                # update status as we've got this book
                                myDB.action("UPDATE books set Status=? where BookID=?", [u"Open", bookid])
                                book_file = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it gets removed, or maybe allow click-to-open?
                                myDB.action("UPDATE books set BookFile=? where BookID=?", [book_file, bookid])
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if lazylibrarian.BOOK_API == "GoogleBooks":
        logger.info("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
        logger.info("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
    if lazylibrarian.BOOK_API == "GoodReads":
        logger.info("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
        logger.info("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
    logger.info("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
    logger.info("Language cache was hit %s times" % stats["sum(cache_hits)"])
    logger.info("Unwanted language removed %s books" % stats["sum(bad_lang)"])
    logger.info("Unwanted characters removed %s books" % stats["sum(bad_char)"])
    logger.info("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.info("ISBN Language cache holds %s entries" % cachesize["count(*)"])
    stats = len(myDB.select("select BookID from Books where status=? and BookLang=?", ["Open", "Unknown"]))
    logger.info("There are %s books in your library with unknown language" % stats)

    logger.info("Updating %i authors" % len(new_authors))
    for auth in new_authors:
        havebooks = len(myDB.select("select BookName from Books where status=? and AuthorName=?", ["Open", auth]))
        myDB.action("UPDATE authors set HaveBooks=? where AuthorName=?", [havebooks, auth])
        totalbooks = len(myDB.select("select BookName from Books where status!=? and AuthorName=?", ["Ignored", auth]))
        myDB.action("UPDATE authors set UnignoredBooks=? where AuthorName=?", [totalbooks, auth])

    logger.info("Library scan complete")
Ejemplo n.º 18
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))
        
    # to save repeat-scans of the same directory if it contains multiple formats of the same book, 
    # keep track of which directories we've already looked at 
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode('utf-8')

            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if formatter.is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (dir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split('.')
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.debug(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
    if stats['sum(GR_book_hits)'] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select('select AuthorName from authors')
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug('Updating bookcounts for %i authors' % len(authors))
    for author in authors:
        name = author['AuthorName']
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
            name).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone()        
        myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name))
        unignoredbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
            name).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name))

    covers = myDB.action("select  count('bookimg') as counter from books where bookimg like 'http%'").fetchone()
    logger.info("Caching covers for %s books" % covers['counter'])

    images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"')
    for item in images:
        bookid = item['bookid']
        bookimg = item['bookimg']
        bookname = item['bookname']
        newimg = bookwork.cache_cover(bookid, bookimg)
        if newimg != bookimg:
            myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))
    logger.info('Library scan complete')
Ejemplo n.º 19
0
def processCSV(search_dir=None):        
    """ Find a csv file in the search_dir and process all the books in it, 
    adding authors to the database if not found, and marking the books as "Wanted" """
     
    if not search_dir:
        logger.warn("Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn("No CSV file found in %s" % search_dir)
    else:
        logger.debug('Reading file %s' % csvFile)
        reader=csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))
            
        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        #print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        #print headers
        
        if 'Author' not in headers or 'Title' not in headers:
            logger.warn('Invalid CSV file found %s' % csvFile)
            return
            
        myDB = database.DBConnection() 
        bookcount = 0
        authcount = 0
        skipcount = 0  
        logger.debug("CSV: Found %s entries in csv file" % len(content.keys()))  
        for bookid in content.keys():
            
            authorname = content[bookid]['Author']
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()
        
            if authmatch:
                logger.debug("CSV: Author %s found in database" % (authorname))
            else:
                logger.debug("CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10=""
            isbn13=""    
            bookname = content[bookid]['Title']
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, fuzzy name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch: 
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info('Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else: # skipped/ignored
                    logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}                  
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:    
                logger.warn("Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info("Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))    
Ejemplo n.º 20
0
 def refreshAuthor(self, AuthorID):
     importer.addAuthorToDB(AuthorID)
     raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)
Ejemplo n.º 21
0
 def refreshAuthor(self, AuthorID):
     importer.addAuthorToDB(AuthorID)
     logger.debug('Refresh Author page for Author %s ' % AuthorID)
     raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)
Ejemplo n.º 22
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn('Cannot find directory: %s. Not scanning' %
                    dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()
    new_authors = []

    logger.info('Scanning ebook directory: %s' %
                dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    book_list = []
    new_book_count = 0
    file_count = 0
    book_exists = False

    if (lazylibrarian.FULL_SCAN):
        books = myDB.select(
            'select AuthorName, BookName from books where Status=?', [u'Open'])
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            for book_type in getList(lazylibrarian.EBOOK_TYPE):
                bookName = book['BookName']
                bookAuthor = book['AuthorName']
                #Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', bookAuthor).replace('$Title', bookName)
                #dest_path = authorname+'/'+bookname
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', bookAuthor).replace('$Title', bookName)

                encoded_book_path = os.path.join(
                    dir, dest_path, global_name + "." + book_type).encode(
                        lazylibrarian.SYS_ENCODING)
                if os.path.isfile(encoded_book_path):
                    book_exists = True
            if not book_exists:
                myDB.action(
                    'update books set Status=? where AuthorName=? and BookName=?',
                    [status, bookAuthor, bookName])
                logger.info('Book %s updated as not found on disk' %
                            encoded_book_path.decode(
                                lazylibrarian.SYS_ENCODING, 'replace'))
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    latest_subdirectory = []
    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            #prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            subdirectory = r.replace(dir, '')
            latest_subdirectory.append(subdirectory)
            logger.info(
                "[%s] Now scanning subdirectory %s" %
                (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'),
                 subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
            matchString = ''
            for char in lazylibrarian.EBOOK_DEST_FILE:
                matchString = matchString + '\\' + char
            #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
            booktypes = ''
            count = -1
            booktype_list = getList(lazylibrarian.EBOOK_TYPE)
            for book_type in booktype_list:
                count += 1
                if count == 0:
                    booktypes = book_type
                else:
                    booktypes = booktypes + '|' + book_type
            matchString = matchString.replace(
                "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
                    "\\$\\T\\i\\t\\l\\e",
                    "(?P<book>.*?)") + '\.[' + booktypes + ']'
            #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE)
            pattern = re.compile(matchString, re.VERBOSE)
            match = pattern.match(files)
            if match:
                author = match.group("author")
                book = match.group("book")
                #check if book is in database, and not marked as in library
                check_exist_book = myDB.action(
                    "SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",
                    [author, book, 'Open']).fetchone()
                if not check_exist_book:
                    check_exist_author = myDB.action(
                        "SELECT * FROM authors where AuthorName=?",
                        [author]).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            continue
                        #only try to add if GR data matches found author data
                        if author_gr:
                            authorid = author_gr['authorid']
                            authorlink = author_gr['authorlink']
                            pageIdx = authorlink.rfind('/')
                            authorlink = authorlink[pageIdx + 1:]
                            #match_auth = authorid+"."+author.replace('. ','_')
                            #Original Line does not allow author match.
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_auth = authorid + "." + match_auth
                            # Hopefully someone can come up with a more efficient way of doing this.
                            logger.debug(match_auth)
                            logger.debug(authorlink)
                            if match_auth == authorlink:
                                logger.info("Adding %s" % author)
                                try:
                                    importer.addAuthorToDB(author)
                                except:
                                    continue
                                check_exist_book = myDB.action(
                                    "SELECT * FROM books where AuthorName=? and BookName=?",
                                    [author, book]).fetchone()
                                if check_exist_book:
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    myDB.action(
                                        'UPDATE books set Status=? where AuthorName=? and BookName=?',
                                        ['Open', author, book])
                                    new_book_count += 1
                            else:
                                logger.info(
                                    "Unable to match %s in GoodReads database"
                                    % author)

                else:
                    if author not in new_authors:
                        new_authors.append(author)
                    myDB.action(
                        'UPDATE books set Status=? where AuthorName=? and BookName=?',
                        ['Open', author, book])
                    new_book_count += 1

                file_count += 1

    logger.info("%s new/modified books found and added to the database" %
                new_book_count)
    logger.info('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select(
                'select BookName from Books where status=? and AuthorName=?',
                ['Open', auth]))
        myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',
                    [havebooks, auth])
        totalbooks = len(
            myDB.select(
                'select BookName from Books where status!=? and AuthorName=?',
                ['Ignored', auth]))
        myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',
                    [totalbooks, auth])

    logger.info('Library scan complete')
Ejemplo n.º 23
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.info(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep, 1)[0]
                                    coverimg = os.path.join(bookdir, 'cover.jpg')
                                    cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database" %
                                    (book, author))


    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                    (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                    (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)'])))
            logger.debug("Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug("Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)'])))
            logger.debug("Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug("Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug("Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug("Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
            nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
    if len(images):
        logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

    images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
    if len(images):
        logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count
Ejemplo n.º 24
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName))

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0

                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s"
                        % (
                            dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                            subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        )
                    )
                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split(".")
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if "title" in res and "creator" in res:  # this is the minimum we need
                            match = 1
                            book = res["title"]
                            author = res["creator"]
                            if "language" in res:
                                language = res["language"]
                            if "identifier" in res:
                                isbn = res["identifier"]
                            if "type" in res:
                                extn = res["type"]
                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn))
                        else:
                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if "title" in res and "creator" in res:  # this is the minimum we need
                        match = 1
                        book = res["title"]
                        author = res["creator"]
                        if "language" in res:
                            language = res["language"]
                        if "identifier" in res:
                            isbn = res["identifier"]
                        logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                            if not match:
                                myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                            else:
                                logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(",")
                            author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                        if author[1] == " ":
                            author = author.replace(" ", ".")
                            author = author.replace("..", ".")

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' % author
                        ).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn("Error finding author id for [%s]" % author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr["authorname"]

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace(".", "_")
                                match_auth = match_auth.replace(" ", "_")
                                match_auth = match_auth.replace("__", "_")
                                match_name = authorname.replace(".", "_")
                                match_name = match_name.replace(" ", "_")
                                match_name = match_name.replace("__", "_")
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)
                                    )

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr["authorname"]
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' % author
                                    ).fetchone()
                                    if not check_exist_author:
                                        logger.debug("Adding new author [%s]" % author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' % author
                                            ).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug("Failed to match author [%s] in database" % author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', "").replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)
                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)
                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' % bookid
                                ).fetchone()
                                if check_status["Status"] != "Open":
                                    # update status as we've got this book
                                    myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid)
                                    book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open
                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)
                                    )
                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if stats["sum(GR_book_hits)"] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
        logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
        logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"])
        logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"])
        logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"])
        logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select("select AuthorName from authors")
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug("Updating bookcounts for %i authors" % len(authors))
    for author in authors:
        name = author["AuthorName"]
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")'
            % name
        ).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name
        ).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name))

    logger.info("Library scan complete")
Ejemplo n.º 25
0
def processAlternate(source_dir=None):
    # import a book from an alternate directory
    if not source_dir or os.path.isdir(source_dir) is False:
        logger.warn('Alternate directory not found')
        return
    if source_dir == lazylibrarian.DESTINATION_DIR:
        logger.warn('Alternate directory must not be the same as destination')
        return

    logger.debug('Processing alternate directory %s' % source_dir)
    # first, recursively process any books in subdirectories
    for fname in os.listdir(source_dir):
        subdir = os.path.join(source_dir, fname)
        if os.path.isdir(subdir):
            processAlternate(subdir)
    # only import one book from each alternate (sub)directory, this is because
    # the importer may delete the directory after importing a book,
    # depending on lazylibrarian.DESTINATION_COPY setting
    # also if multiple books in a folder and only a "metadata.opf"
    # which book is it for?
    new_book = book_file(source_dir, booktype='book')
    if new_book:
        metadata = {}
        # see if there is a metadata file in this folder with the info we need
        # try book_name.opf first, or fall back to any filename.opf
        metafile = os.path.splitext(new_book)[0] + '.opf'
        if not os.path.isfile(metafile):
            metafile = librarysync.opf_file(source_dir)
        if os.path.isfile(metafile):
            try:
                metadata = librarysync.get_book_info(metafile)
            except:
                logger.debug('Failed to read metadata from %s' % metafile)
        else:
            logger.debug('No metadata file found for %s' % new_book)
        if not 'title' in metadata and 'creator' in metadata:
            # try to get metadata from the book file
            try:
                metadata = librarysync.get_book_info(new_book)
            except:
                logger.debug('No metadata found in %s' % new_book)
        if 'title' in metadata and 'creator' in metadata:
            authorname = metadata['creator']
            bookname = metadata['title']
            myDB = database.DBConnection()

            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                logger.debug("ALT: Author %s found in database" % (authorname))
            else:
                logger.debug("ALT: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)

            bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
            if bookid:
                import_book(source_dir, bookid)
            else:
                logger.warn("Book %s by %s not found in database" % (bookname, authorname))
        else:
            logger.warn('Book %s has no metadata, unable to import' % new_book)
    else:
        logger.warn("No book file found in %s" % source_dir)
Ejemplo n.º 26
0
def LibraryScan(dir=None):
	if not dir:
		if not lazylibrarian.DOWNLOAD_DIR:
			return
		else:
			dir = lazylibrarian.DOWNLOAD_DIR

	if not os.path.isdir(dir):
		logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
		return
	
	myDB = database.DBConnection()
	new_authors = []

	logger.info('Scanning ebook directory: %s' % dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

	book_list = []
	new_book_count = 0
	file_count = 0 
	book_exists = False

	if (lazylibrarian.FULL_SCAN):
		books = myDB.select('select AuthorName, BookName from books where Status=?',[u'Open'])
		status = lazylibrarian.NOTFOUND_STATUS
		logger.info('Missing books will be marked as %s' % status)
		for book in books:
			for book_type in getList(lazylibrarian.EBOOK_TYPE):
				bookName = book['BookName']
				bookAuthor = book['AuthorName']
				#Default destination path, should be allowed change per config file.
				dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
				#dest_path = authorname+'/'+bookname
				global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)

				encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
				if os.path.isfile(encoded_book_path):
					book_exists = True	
			if not book_exists:
				myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
				logger.info('Book %s updated as not found on disk' % encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
				if bookAuthor not in new_authors:
					new_authors.append(bookAuthor)

	latest_subdirectory = []
	for r,d,f in os.walk(dir):
		for directory in d[:]:
			if directory.startswith("."):
				d.remove(directory)
			#prevent magazine being scanned
			if directory.startswith("_"):
				d.remove(directory)
		for files in f:
			 subdirectory = r.replace(dir,'')
			 latest_subdirectory.append(subdirectory)
			 logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
			 matchString = ''
			 for char in lazylibrarian.EBOOK_DEST_FILE:
				matchString = matchString + '\\' + char
			 #massage the EBOOK_DEST_FILE config parameter into something we can use with regular expression matching
			 booktypes = ''
			 count=-1;
			 booktype_list =  getList(lazylibrarian.EBOOK_TYPE)
			 for book_type in booktype_list:
			 	count+=1
				if count == 0:
					booktypes = book_type
				else:
					booktypes = booktypes + '|'+book_type
			 matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e","(?P<book>.*?)")+'\.['+booktypes+']'
			 #pattern = re.compile(r'(?P<author>.*?)\s\-\s(?P<book>.*?)\.(?P<format>.*?)', re.VERBOSE)
			 pattern = re.compile(matchString, re.VERBOSE)
			 match = pattern.match(files)
			 if match:
				author = match.group("author")
				book = match.group("book")
			 	#check if book is in database, and not marked as in library
				check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=? and Status!=?",[author,book,'Open']).fetchone()
				if not check_exist_book:
					check_exist_author = myDB.action("SELECT * FROM authors where AuthorName=?",[author]).fetchone()
					if not check_exist_author and lazylibrarian.ADD_AUTHOR:
						GR = GoodReads(author)
						try:
							author_gr = GR.find_author_id()
						except:
							continue
						#only try to add if GR data matches found author data
						if author_gr:
							authorid = author_gr['authorid']
							authorlink  = author_gr['authorlink']
							pageIdx = authorlink.rfind('/')
							authorlink  = authorlink[pageIdx+1:]
							match_auth = authorid+"."+author.replace('. ','_')
							logger.debug(match_auth)
							logger.debug(authorlink)
							if match_auth == authorlink:
								logger.info("Adding %s" % author)
								try:
									importer.addAuthorToDB(author)
								except:
									continue
								check_exist_book = myDB.action("SELECT * FROM books where AuthorName=? and BookName=?",[author,book]).fetchone()
								if check_exist_book:
									if author not in new_authors:
										new_authors.append(author)
									myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book])
									new_book_count += 1
							else:
								logger.info("Unable to match %s in GoodReads database" % author)
							

				else:
					if author not in new_authors:
						new_authors.append(author)
					myDB.action('UPDATE books set Status=? where AuthorName=? and BookName=?',['Open',author,book])
					new_book_count += 1
				
				file_count += 1
	
	logger.info("%s new/modified books found and added to the database" % new_book_count)
	logger.info('Updating %i authors' % len(new_authors))
	for auth in new_authors:
		havebooks = len(myDB.select('select BookName from Books where status=? and AuthorName=?',['Open',auth]))
		myDB.action('UPDATE authors set HaveBooks=? where AuthorName=?',[havebooks,auth])
		totalbooks = len(myDB.select('select BookName from Books where status!=? and AuthorName=?',['Ignored',auth]))
		myDB.action('UPDATE authors set UnignoredBooks=? where AuthorName=?',[totalbooks,auth]) 

	logger.info('Library scan complete')
Ejemplo n.º 27
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Please check Alternate Directory setting")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                headers = row
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                # we include the key in the dictionary as our exported csv files use
                # bookid as the key
                content[row[0]] = dict(zip(headers, row))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of keys
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s book%s in csv file" %
                     (len(content.keys()), plural(len(content.keys()))))
        for item in content.keys():
            authorname = content[item]['Author']
            if hasattr(authorname, 'decode'):
                authorname = authorname.decode(lazylibrarian.SYS_ENCODING)

            authmatch = myDB.action(
                'SELECT * FROM authors where AuthorName="%s"' %
                (authorname)).fetchone()

            if authmatch:
                newauthor = False
                logger.debug(u"CSV: Author %s found in database" %
                             (authorname))
            else:
                newauthor = True
                logger.debug(u"CSV: Author %s not found, adding to database" %
                             (authorname))
                addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = finditem(content[item], headers)

            # if we didn't find it, maybe author info is stale
            if not bookmatch and not newauthor:
                addAuthorToDB(authorname, refresh=True)
                bookmatch = finditem(content[item], headers)

            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(
                        u'Found book %s by %s, already marked as "%s"' %
                        (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" %
                            (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(
            u"Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found"
            % (authcount, plural(authcount), bookcount, plural(bookcount),
               skipcount, plural(skipcount)))
Ejemplo n.º 28
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    new_authors = []

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action(
                    'update books set Status="%s" where BookID="%s"' %
                    (status, bookID))
                myDB.action(
                    'update books set BookFile="" where BookID="%s"' %
                    bookID)
                logger.warn(
                    'Book %s - %s updated as not found on disk' %
                    (bookAuthor, bookName))
            # for book_type in getList(lazylibrarian.EBOOK_TYPE):
            #	bookName = book['BookName']
            #	bookAuthor = book['AuthorName']
            # Default destination path, should be allowed change per config file.
            #	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
            # dest_path = authorname+'/'+bookname
            #	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
#
            #	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
            #	if os.path.isfile(encoded_book_path):
            #		book_exists = True
            # if not book_exists:
            #	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
            # logger.info('Book %s updated as not found on disk' %
            # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same
            # subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
# 			If this is a book, try to get author/title/isbn/language
# 			If metadata.opf exists, use that
# 			else if epub or mobi, read metadata from the book
# 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR late
                match = 0
                extn = ""
                
                if '.' in files:
                    words = files.split('.')
                    extn = words[len(words) - 1]
                    
                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s" %
                        (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
                    
                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know 
                    # LL preferred authorname/bookname at this point
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        else:
                            language = ""
                        if 'identifier' in res:
                            isbn = res['identifier']
                        else:
                            isbn = ""
                        match = 1
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata
                        # from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            try:
                                res = get_book_info(book_filename)
                            except:
                                res = {}
                            if 'title' in res and 'creator' in res:  # this is the minimum we need
                                book = res['title']
                                author = res['creator']
                                if 'language' in res:
                                    language = res['language']
                                else:
                                    language = ""
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                else:
                                    isbn = ""
                                logger.debug("book meta [%s] [%s] [%s] [%s]" %
                                    (isbn, language, author, book))
                                match = 1
                            else:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                if match:
                    processed_subdirectories.append(
                        subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                    if not formatter.is_valid_isbn(isbn):
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug(
                            "Found Language [%s] ISBN [%s]" %
                            (language, isbn))
                        # we need to add it to language cache if not already
                        # there, is_valid_isbn has checked length is 10 or 13
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action(
                            'SELECT lang FROM languages where isbn = "%s"' %
                            (isbnhead)).fetchone()
                        if not match:
                            myDB.action(
                                'insert into languages values ("%s", "%s")' %
                                (isbnhead, language))
                            logger.debug(
                                "Cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))
                        else:
                            logger.debug(
                                "Already cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))

                    # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(',')
                        author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                    if author[1] == ' ':        
                        author = author.replace(' ', '.')
                        author = author.replace('..', '.')

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action(
                        'SELECT * FROM authors where AuthorName="%s"' %
                        author).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to
                        # add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.warn(
                                "Error finding author id for [%s]" %
                                author)
                            continue

                        # only try to add if GR data matches found author data
                        # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr['authorname']

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_name = authorname.replace('.', '_')
                            match_name = match_name.replace(' ', '_')
                            match_name = match_name.replace('__', '_')
                            match_name = common.remove_accents(match_name)
                            match_auth = common.remove_accents(match_auth)
                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The currently non-configurable value of fuzziness might need to go in config
                            # We stored GoodReads unmodified author name in
                            # author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.debug(
                                    "Failed to match author [%s] fuzz [%d]" %
                                    (author, match_fuzz))
                                logger.debug(
                                    "Failed to match author [%s] to authorname [%s]" %
                                    (match_auth, match_name))

                            # To save loading hundreds of books by unknown
                            # authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a
                                # different author!
                                author = author_gr['authorname']
                                # this new authorname may already be in the
                                # database, so check again
                                check_exist_author = myDB.action(
                                    'SELECT * FROM authors where AuthorName="%s"' %
                                    author).fetchone()
                                if not check_exist_author:
                                    logger.debug(
                                        "Adding new author [%s]" %
                                        author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            'SELECT * FROM authors where AuthorName="%s"' %
                                            author).fetchone()
                                    except:
                                        continue

                    # check author exists in db, either newly loaded or already
                    # there
                    if not check_exist_author:
                        logger.debug(
                            "Failed to match author [%s] in database" %
                            author)
                    else:
                        # author exists, check if this book by this author is in our database
                        # metadata might have quotes in book name
                        book = book.replace('"', '').replace("'", "")
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so,
                            # we already had it)
                            check_status = myDB.action(
                                'SELECT Status from books where BookID="%s"' %
                                bookid).fetchone()
                            if check_status['Status'] != 'Open':
                                # update status as we've got this book
                                myDB.action(
                                    'UPDATE books set Status="Open" where BookID="%s"' %
                                    bookid)
                                book_filename = os.path.join(
                                    r,
                                    files).encode(
                                        lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it
                                # gets removed, or allow click-to-open
                                myDB.action(
                                    'UPDATE books set BookFile="%s" where BookID="%s"' %
                                    (book_filename, bookid))
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    if new_book_count:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)'])
    stats = len(
        myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn(
            "There are %s books in your library with unknown language" %
            stats)

    logger.debug('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' %
                        ('Open', auth)))
        myDB.action(
            'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' %
            (havebooks, auth))
        totalbooks = len(
            myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' %
                        ('Ignored', auth)))
        myDB.action(
            'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' %
            (totalbooks, auth))

    logger.info('Library scan complete')
Ejemplo n.º 29
0
 def refreshAuthor(self, AuthorID):
     importer.addAuthorToDB(AuthorID)
     logger.debug('Refresh Author page for Author %s '% AuthorID)
     raise cherrypy.HTTPRedirect("authorPage?AuthorID=%s" % AuthorID)