Example #1
0
def finditem(item, preferred_authorname):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = ""
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']

    bookname = makeUnicode(bookname)
    if 'ISBN' in item:
        isbn10 = item['ISBN']
    if 'ISBN13' in item:
        isbn13 = item['ISBN13']
    if 'BookID' in item:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    cmd = 'SELECT AuthorName,BookName,BookID,books.Status FROM books,authors where books.AuthorID = authors.AuthorID '
    if bookid:
        fullcmd = cmd + 'and BookID=?'
        bookmatch = myDB.match(fullcmd, (bookid, ))
    if not bookmatch:
        if is_valid_isbn(isbn10):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn10, ))
    if not bookmatch:
        if is_valid_isbn(isbn13):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn13, ))
    if not bookmatch:
        bookid, mtype = find_book_in_db(preferred_authorname,
                                        bookname,
                                        ignored=False)
        if bookid and mtype == "Ignored":
            logger.warn(
                "Book %s by %s is marked Ignored in database, importing anyway"
                % (bookname, preferred_authorname))
        if bookid:
            fullcmd = cmd + 'and BookID=?'
            bookmatch = myDB.match(fullcmd, (bookid, ))
    return bookmatch
Example #2
0
def finditem(item, preferred_authorname, library='eBook'):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = ""
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']

    bookname = makeUnicode(bookname)
    if 'ISBN' in item:
        isbn10 = item['ISBN']
    if 'ISBN13' in item:
        isbn13 = item['ISBN13']
    if 'BookID' in item:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    cmd = 'SELECT AuthorName,BookName,BookID,books.Status,AudioStatus,Requester,'
    cmd += 'AudioRequester FROM books,authors where books.AuthorID = authors.AuthorID '
    if bookid:
        fullcmd = cmd + 'and BookID=?'
        bookmatch = myDB.match(fullcmd, (bookid, ))
    if not bookmatch:
        if is_valid_isbn(isbn10):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn10, ))
    if not bookmatch:
        if is_valid_isbn(isbn13):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn13, ))
    if not bookmatch:
        bookid, mtype = find_book_in_db(preferred_authorname,
                                        bookname,
                                        ignored=False,
                                        library=library)
        if bookid:
            fullcmd = cmd + 'and BookID=?'
            bookmatch = myDB.match(fullcmd, (bookid, ))
    return bookmatch
Example #3
0
def finditem(item, headers):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = False
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']
    authorname = item['Author']
    if hasattr(authorname, 'decode'):
        authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
    if hasattr(bookname, 'decode'):
        bookname = bookname.decode(lazylibrarian.SYS_ENCODING)
    if 'ISBN' in headers:
        isbn10 = item['ISBN']
    if 'ISBN13' in headers:
        isbn13 = item['ISBN13']
    if 'BookID' in headers:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    if bookid:
        bookmatch = myDB.action('SELECT * FROM books where BookID=%s' %
                                (bookid)).fetchone()
    if not bookmatch:
        if is_valid_isbn(isbn10):
            bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' %
                                    (isbn10)).fetchone()
    if not bookmatch:
        if is_valid_isbn(isbn13):
            bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' %
                                    (isbn13)).fetchone()
    if not bookmatch:
        bookid = find_book_in_db(myDB, authorname, bookname)
        if bookid:
            bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' %
                                    (bookid)).fetchone()
    return bookmatch
Example #4
0
def finditem(item, authorname, headers):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = ""
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']

    if isinstance(bookname, str) and hasattr(bookname, "decode"):
        bookname = bookname.decode(lazylibrarian.SYS_ENCODING)
    if 'ISBN' in headers:
        isbn10 = item['ISBN']
    if 'ISBN13' in headers:
        isbn13 = item['ISBN13']
    if 'BookID' in headers:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    cmd = 'SELECT AuthorName,BookName,BookID,books.Status FROM books,authors where books.AuthorID = authors.AuthorID '
    if bookid:
        fullcmd = cmd + 'and BookID=?'
        bookmatch = myDB.match(fullcmd, (bookid,))
    if not bookmatch:
        if is_valid_isbn(isbn10):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn10,))
    if not bookmatch:
        if is_valid_isbn(isbn13):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn13,))
    if not bookmatch:
        bookid = find_book_in_db(authorname, bookname)
        if bookid:
            fullcmd = cmd + 'and BookID=?'
            bookmatch = myDB.match(fullcmd, (bookid,))
    return bookmatch
Example #5
0
def finditem(item, preferred_authorname, library='eBook'):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = ""
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']

    bookname = makeUnicode(bookname)
    if 'ISBN' in item:
        isbn10 = item['ISBN']
    if 'ISBN13' in item:
        isbn13 = item['ISBN13']
    if 'BookID' in item:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    cmd = 'SELECT AuthorName,BookName,BookID,books.Status,AudioStatus,Requester,'
    cmd += 'AudioRequester FROM books,authors where books.AuthorID = authors.AuthorID '
    if bookid:
        fullcmd = cmd + 'and BookID=?'
        bookmatch = myDB.match(fullcmd, (bookid,))
    if not bookmatch:
        if is_valid_isbn(isbn10):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn10,))
    if not bookmatch:
        if is_valid_isbn(isbn13):
            fullcmd = cmd + 'and BookIsbn=?'
            bookmatch = myDB.match(fullcmd, (isbn13,))
    if not bookmatch:
        bookid, mtype = find_book_in_db(preferred_authorname, bookname, ignored=False, library=library)
        if bookid:
            fullcmd = cmd + 'and BookID=?'
            bookmatch = myDB.match(fullcmd, (bookid,))
    return bookmatch
Example #6
0
def finditem(item, headers):
    """
    Try to find book matching the csv item in the database
    Return database entry, or False if not found
    """
    myDB = database.DBConnection()
    bookmatch = False
    isbn10 = ""
    isbn13 = ""
    bookid = ""
    bookname = item['Title']
    authorname = item['Author']
    if hasattr(authorname, 'decode'):
        authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
    if hasattr(bookname, 'decode'):
        bookname = bookname.decode(lazylibrarian.SYS_ENCODING)
    if 'ISBN' in headers:
        isbn10 = item['ISBN']
    if 'ISBN13' in headers:
        isbn13 = item['ISBN13']
    if 'BookID' in headers:
        bookid = item['BookID']

    # try to find book in our database using bookid or isbn, or if that fails, name matching
    if bookid:
        bookmatch = myDB.action('SELECT * FROM books where BookID=%s' % (bookid)).fetchone()
    if not bookmatch:
        if is_valid_isbn(isbn10):
            bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn10)).fetchone()
    if not bookmatch:
        if is_valid_isbn(isbn13):
            bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
    if not bookmatch:
        bookid = find_book_in_db(myDB, authorname, bookname)
        if bookid:
            bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
    return bookmatch
Example #7
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))
        
    # to save repeat-scans of the same directory if it contains multiple formats of the same book, 
    # keep track of which directories we've already looked at 
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode('utf-8')

            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if formatter.is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (dir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split('.')
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.debug(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
    if stats['sum(GR_book_hits)'] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select('select AuthorName from authors')
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug('Updating bookcounts for %i authors' % len(authors))
    for author in authors:
        name = author['AuthorName']
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
            name).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone()        
        myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name))
        unignoredbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
            name).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name))

    covers = myDB.action("select  count('bookimg') as counter from books where bookimg like 'http%'").fetchone()
    logger.info("Caching covers for %s books" % covers['counter'])

    images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"')
    for item in images:
        bookid = item['bookid']
        bookimg = item['bookimg']
        bookname = item['bookname']
        newimg = bookwork.cache_cover(bookid, bookimg)
        if newimg != bookimg:
            myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))
    logger.info('Library scan complete')
Example #8
0
    def find_results(self, authorname=None, queue=None):

        myDB = database.DBConnection()
        resultlist = []
        # See if we should check ISBN field, otherwise ignore it
        api_strings = ['inauthor:', 'intitle:']
        if is_valid_isbn(authorname):
            api_strings = ['isbn:']

        api_hits = 0
        logger.debug(
            'Now searching Google Books API with keyword: ' +
            self.name)

        for api_value in api_strings:
            startindex = 0
            if api_value == "isbn:":
                set_url = self.url + urllib.quote(api_value + self.name.encode(lazylibrarian.SYS_ENCODING))
            else:
                set_url = self.url + \
                    urllib.quote(api_value + '"' + self.name.encode(lazylibrarian.SYS_ENCODING) + '"')

            try:
                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0

                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits = api_hits + 1
                            number_results = jsonresults['totalItems']
                            logger.debug('Searching url: ' + URL)
                        if number_results == 0:
                            logger.warn(
                                'Found no results for %s with value: %s' %
                                (api_value, self.name))
                            break
                        else:
                            pass
                    except HTTPError as err:
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while' %
                            err.reason)
                        break

                    startindex = startindex + 40

                    for item in jsonresults['items']:

                        total_count = total_count + 1

                        # skip if no author, no author is no book.
                        try:
                            Author = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            no_author_count = no_author_count + 1
                            continue

                        valid_langs = ([valid_lang.strip()
                                       for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

                        if "All" not in valid_langs:  # don't care about languages, accept all
                            try:
                                # skip if language is not in valid list -
                                booklang = item['volumeInfo']['language']
                                if booklang not in valid_langs:
                                    logger.debug(
                                        'Skipped a book with language %s' %
                                        booklang)
                                    ignored = ignored + 1
                                    continue
                            except KeyError:
                                ignored = ignored + 1
                                logger.debug(
                                    'Skipped a result where no language is found')
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = None

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = None

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'
                        bookdate = bookdate[:4]

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = '0'

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = None

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = 'Not available'

                        try:
                            num_reviews = item['volumeInfo']['ratingsCount']
                        except KeyError:
                            num_reviews = 0

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = 0
                        except KeyError:
                            bookisbn = 0

                        author_fuzz = fuzz.token_set_ratio(Author, authorname)
                        book_fuzz = fuzz.token_set_ratio(
                            item['volumeInfo']['title'],
                            authorname)

                        isbn_fuzz = 0
                        if is_valid_isbn(authorname):
                            isbn_fuzz = 100

                        highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz)

                        bookname = item['volumeInfo']['title']
                        dic = {':': '', '"': '', '\'': ''}
                        bookname = replace_all(bookname, dic)

                        bookname = unaccented(bookname)
                        bookname = bookname.strip()  # strip whitespace
                        bookid = item['id']

                        author = myDB.select(
                            'SELECT AuthorID FROM authors WHERE AuthorName = "%s"' %
                            Author.replace('"', '""'))
                        if author:
                            AuthorID = author[0]['authorid']
                        else:
                            AuthorID = ''

                        resultlist.append({
                            'authorname': Author,
                            'authorid': AuthorID,
                            'bookid': bookid,
                            'bookname': bookname,
                            'booksub': booksub,
                            'bookisbn': bookisbn,
                            'bookpub': bookpub,
                            'bookdate': bookdate,
                            'booklang': booklang,
                            'booklink': item['volumeInfo']['canonicalVolumeLink'],
                            'bookrate': float(bookrate),
                            'bookimg': bookimg,
                            'bookpages': bookpages,
                            'bookgenre': bookgenre,
                            'bookdesc': bookdesc,
                            'author_fuzz': author_fuzz,
                            'book_fuzz': book_fuzz,
                            'isbn_fuzz': isbn_fuzz,
                            'highest_fuzz': highest_fuzz,
                            'num_reviews': num_reviews
                        })

                        resultcount = resultcount + 1

            except KeyError:
                break

        logger.debug("Found %s total result%s" % (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s" % (ignored, plural(ignored)))
        logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
        logger.debug(
            "Showing %s result%s for (%s) with keyword: %s" %
            (resultcount, plural(resultcount), api_value, authorname))
        logger.debug(
            'The Google Books API was hit %s time%s for keyword %s' %
            (api_hits, plural(api_hits), self.name))
        queue.put(resultlist)
Example #9
0
    def find_results(self, authorname=None, queue=None):
      try:
        myDB = database.DBConnection()
        resultlist = []
        # See if we should check ISBN field, otherwise ignore it
        api_strings = ['inauthor:', 'intitle:']
        if is_valid_isbn(authorname):
            api_strings = ['isbn:']

        api_hits = 0
        logger.debug(
            'Now searching Google Books API with keyword: ' +
            self.name)

        for api_value in api_strings:
            startindex = 0
            if api_value == "isbn:":
                set_url = self.url + urllib.quote(api_value + self.name.encode(lazylibrarian.SYS_ENCODING))
            else:
                set_url = self.url + \
                    urllib.quote(api_value + '"' + self.name.encode(lazylibrarian.SYS_ENCODING) + '"')

            try:
                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0

                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits = api_hits + 1
                            number_results = jsonresults['totalItems']
                            logger.debug('Searching url: ' + URL)
                        if number_results == 0:
                            logger.warn(
                                'Found no results for %s with value: %s' %
                                (api_value, self.name))
                            break
                        else:
                            pass
                    except HTTPError as err:
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while' %
                            err.reason)
                        break

                    startindex = startindex + 40

                    for item in jsonresults['items']:

                        total_count = total_count + 1

                        # skip if no author, no author is no book.
                        try:
                            Author = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug('Skipped a result without authorfield.')
                            no_author_count = no_author_count + 1
                            continue
                        try:
                            bookname = item['volumeInfo']['title']
                        except KeyError:
                            logger.debug('Skipped a result without title.')
                            continue

                        valid_langs = ([valid_lang.strip()
                                       for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

                        if "All" not in valid_langs:  # don't care about languages, accept all
                            try:
                                # skip if language is not in valid list -
                                booklang = item['volumeInfo']['language']
                                if booklang not in valid_langs:
                                    logger.debug(
                                        'Skipped %s with language %s' % (bookname, booklang))
                                    ignored = ignored + 1
                                    continue
                            except KeyError:
                                ignored = ignored + 1
                                logger.debug('Skipped %s where no language is found', bookname)
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = None

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = None

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'
                        bookdate = bookdate[:4]

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = '0'

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = None

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = 'Not available'

                        try:
                            num_reviews = item['volumeInfo']['ratingsCount']
                        except KeyError:
                            num_reviews = 0

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = 0
                        except KeyError:
                            bookisbn = 0

                        author_fuzz = fuzz.token_set_ratio(Author, authorname)
                        book_fuzz = fuzz.token_set_ratio(bookname, authorname)

                        isbn_fuzz = 0
                        if is_valid_isbn(authorname):
                            isbn_fuzz = 100

                        highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz)

                        dic = {':': '', '"': '', '\'': ''}
                        bookname = replace_all(bookname, dic)

                        bookname = unaccented(bookname)
                        bookname = bookname.strip()  # strip whitespace
                        bookid = item['id']

                        author = myDB.select(
                            'SELECT AuthorID FROM authors WHERE AuthorName = "%s"' %
                            Author.replace('"', '""'))
                        if author:
                            AuthorID = author[0]['authorid']
                        else:
                            AuthorID = ''

                        resultlist.append({
                            'authorname': Author,
                            'authorid': AuthorID,
                            'bookid': bookid,
                            'bookname': bookname,
                            'booksub': booksub,
                            'bookisbn': bookisbn,
                            'bookpub': bookpub,
                            'bookdate': bookdate,
                            'booklang': booklang,
                            'booklink': item['volumeInfo']['canonicalVolumeLink'],
                            'bookrate': float(bookrate),
                            'bookimg': bookimg,
                            'bookpages': bookpages,
                            'bookgenre': bookgenre,
                            'bookdesc': bookdesc,
                            'author_fuzz': author_fuzz,
                            'book_fuzz': book_fuzz,
                            'isbn_fuzz': isbn_fuzz,
                            'highest_fuzz': highest_fuzz,
                            'num_reviews': num_reviews
                        })

                        resultcount = resultcount + 1

            except KeyError:
                break

        logger.debug("Found %s total result%s" % (total_count, plural(total_count)))
        logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
        logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
        logger.debug("Showing %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, authorname))
        logger.debug(
            'The Google Books API was hit %s time%s for keyword %s' %
            (api_hits, plural(api_hits), self.name))
        queue.put(resultlist)

      except Exception as e:
        logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
Example #10
0
def processCSV(search_dir=None):        
    """ Find a csv file in the search_dir and process all the books in it, 
    adding authors to the database if not found, and marking the books as "Wanted" """
     
    if not search_dir:
        logger.warn("Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn("No CSV file found in %s" % search_dir)
    else:
        logger.debug('Reading file %s' % csvFile)
        reader=csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))
            
        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        #print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        #print headers
        
        if 'Author' not in headers or 'Title' not in headers:
            logger.warn('Invalid CSV file found %s' % csvFile)
            return
            
        myDB = database.DBConnection() 
        bookcount = 0
        authcount = 0
        skipcount = 0  
        logger.debug("CSV: Found %s entries in csv file" % len(content.keys()))  
        for bookid in content.keys():
            
            authorname = content[bookid]['Author']
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()
        
            if authmatch:
                logger.debug("CSV: Author %s found in database" % (authorname))
            else:
                logger.debug("CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10=""
            isbn13=""    
            bookname = content[bookid]['Title']
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, fuzzy name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch: 
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info('Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else: # skipped/ignored
                    logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}                  
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:    
                logger.warn("Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info("Added %i new authors, marked %i books as 'Wanted', %i books not found" % (authcount, bookcount, skipcount))    
Example #11
0
def get_book_info(fname):
    # only handles epub, mobi and opf for now,
    # for pdf see below
    res = {}
    extn = os.path.splitext(fname)[1]
    if not extn:
        return res
    if extn == ".mobi":
        try:
            book = Mobi(fname)
            book.parse()
        except:
            return res
        res["creator"] = book.author()
        res["title"] = book.title()
        res["language"] = book.language()
        res["identifier"] = book.isbn()
        res["type"] = "mobi"
        return res

    """
    # none of the pdfs in my library had language,isbn
    # most didn't have author, or had the wrong author
    # (author set to publisher, or software used)
    # so probably not much point in looking at pdfs
    #
    # if (extn == ".pdf"):
    #	  pdf = PdfFileReader(open(fname, "rb"))
    #	  txt = pdf.getDocumentInfo()
          # repackage the data here to get components we need
    #     res = {}
    #     for s in ['title','language','creator']:
    #         res[s] = txt[s]
    #	  res['identifier'] = txt['isbn']
    #     res['type'] = "pdf"
    #	  return res
    """

    if extn == ".epub":
        # prepare to read from the .epub file
        zipdata = zipfile.ZipFile(fname)
        # find the contents metafile
        txt = zipdata.read("META-INF/container.xml")
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res

        while n < len(tree[0]):
            att = tree[0][n].attrib
            if "full-path" in att:
                cfname = att["full-path"]
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)
        tree = ElementTree.fromstring(txt)
        res["type"] = "epub"
    else:
        if extn == ".opf":
            txt = open(fname).read()
            tree = ElementTree.fromstring(txt)
            res["type"] = "opf"
        else:
            return ""

    # repackage the data
    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).split("}")[1]
        txt = tree[0][n].text
        attrib = str(tree[0][n].attrib)
        isbn = ""
        if "title" in tag.lower():
            res["title"] = txt
        elif "language" in tag.lower():
            res["language"] = txt
        elif "creator" in tag.lower():
            res["creator"] = txt
        elif "identifier" in tag.lower() and "isbn" in attrib.lower():
            if formatter.is_valid_isbn(txt):
                res["identifier"] = txt
        n = n + 1
    return res
Example #12
0
def get_book_info(fname):
    # only handles epub, mobi and opf for now,
    # for pdf see below
    res = {}
    if not '.' in fname:
        return res
    words = fname.split('.')
    extn = words[len(words) - 1]

    if extn == "mobi":
        try:
            book = Mobi(fname)
            book.parse()
        except:
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        return res

    # none of the pdfs in my library had language,isbn
    # most didn't have author, or had the wrong author
    # (author set to publisher, or software used)
    # so probably not much point in looking at pdfs
    #
    # if (extn == "pdf"):
    #	  pdf = PdfFileReader(open(fname, "rb"))
    #	  txt = pdf.getDocumentInfo()
          # repackage the data here to get components we need
    #     res = {}
    #     for s in ['title','language','creator']:
    #         res[s] = txt[s]
    #	  res['identifier'] = txt['isbn']
    #	  return res

    if extn == "epub":
        # prepare to read from the .epub file
        zipdata = zipfile.ZipFile(fname)
        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res
        while n < len(tree[0]):
            att = str(tree[0][n].attrib)
            if 'full-path' in att:
                cfname = ("%s" % att)  # extract metadata filename
                cfname = cfname.split(',')[1].split(':')[1].strip('\' }')
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)
        tree = ElementTree.fromstring(txt)
    else:
        if extn == "opf":
            txt = open(fname).read()
            tree = ElementTree.fromstring(txt)
        else:
            return ""

    # repackage the data
    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).split('}')[1]
        txt = tree[0][n].text
        attrib = str(tree[0][n].attrib)
        isbn = ""
        if 'title' in tag.lower():
            res['title'] = txt
        elif 'language' in tag.lower():
            res['language'] = txt
        elif 'creator' in tag.lower():
            res['creator'] = txt
        elif 'identifier' in tag.lower() and 'isbn' in attrib.lower():
            if formatter.is_valid_isbn(txt):
                res['identifier'] = isbn
        n = n + 1
    return res
Example #13
0
def processCSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
    adding authors to the database if not found, and marking the books as "Wanted" """

    if not search_dir or os.path.isdir(search_dir) is False:
        logger.warn(u"Alternate Directory must not be empty")
        return False

    csvFile = csv_file(search_dir)

    headers = None
    content = {}

    if not csvFile:
        logger.warn(u"No CSV file found in %s" % search_dir)
    else:
        logger.debug(u'Reading file %s' % csvFile)
        reader = csv.reader(open(csvFile))
        for row in reader:
            if reader.line_num == 1:
                # If we are on the first line, create the headers list from the first row
                # by taking a slice from item 1  as we don't need the very first header.
                headers = row[1:]
            else:
                # Otherwise, the key in the content dictionary is the first item in the
                # row and we can create the sub-dictionary by using the zip() function.
                content[row[0]] = dict(zip(headers, row[1:]))

        # We can now get to the content by using the resulting dictionary, so to see
        # the list of lines, we can do:
        # print content.keys() # to get a list of bookIDs
        # To see the list of fields available for each book
        # print headers

        if 'Author' not in headers or 'Title' not in headers:
            logger.warn(u'Invalid CSV file found %s' % csvFile)
            return

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        logger.debug(u"CSV: Found %s entries in csv file" % len(content.keys()))
        for bookid in content.keys():

            authorname = formatter.latinToAscii(content[bookid]['Author'])
            authmatch = myDB.action('SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone()

            if authmatch:
                logger.debug(u"CSV: Author %s found in database" % (authorname))
            else:
                logger.debug(u"CSV: Author %s not found, adding to database" % (authorname))
                importer.addAuthorToDB(authorname)
                authcount = authcount + 1

            bookmatch = 0
            isbn10 = ""
            isbn13 = ""
            bookname = formatter.latinToAscii(content[bookid]['Title'])
            if 'ISBN' in headers:
                isbn10 = content[bookid]['ISBN']
            if 'ISBN13' in headers:
                isbn13 = content[bookid]['ISBN13']

            # try to find book in our database using isbn, or if that fails, name matching
            if formatter.is_valid_isbn(isbn10):
                bookmatch = myDB.action('SELECT * FROM books where Bookisbn=%s' % (isbn10)).fetchone()
            if not bookmatch:
                if formatter.is_valid_isbn(isbn13):
                    bookmatch = myDB.action('SELECT * FROM books where BookIsbn=%s' % (isbn13)).fetchone()
            if not bookmatch:
                bookid = librarysync.find_book_in_db(myDB, authorname, bookname)
                if bookid:
                    bookmatch = myDB.action('SELECT * FROM books where BookID="%s"' % (bookid)).fetchone()
            if bookmatch:
                authorname = bookmatch['AuthorName']
                bookname = bookmatch['BookName']
                bookid = bookmatch['BookID']
                bookstatus = bookmatch['Status']
                if bookstatus == 'Open' or bookstatus == 'Wanted' or bookstatus == 'Have':
                    logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                else:  # skipped/ignored
                    logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"Status": "Wanted"}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    bookcount = bookcount + 1
            else:
                logger.warn(u"Skipping book %s by %s, not found in database" % (bookname, authorname))
                skipcount = skipcount + 1
        logger.info(u"Added %i new authors, marked %i books as 'Wanted', %i books not found" %
                    (authcount, bookcount, skipcount))
Example #14
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' %
                         fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace(
                        '"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm +
                                              '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' +
                                                   searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn(
                                    'Found no results for %s with value: %s' %
                                    (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while'
                                % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug(
                                    'Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(
                                lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' %
                                            (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug(
                                        'Skipped %s where no language is found'
                                        % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(
                                    book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(
                                    book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2,
                                               isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?',
                                    (book['author'].replace('"', '""'), ))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname':
                                book['author'],
                                'authorid':
                                AuthorID,
                                'bookid':
                                item['id'],
                                'bookname':
                                bookname,
                                'booksub':
                                book['sub'],
                                'bookisbn':
                                book['isbn'],
                                'bookpub':
                                book['pub'],
                                'bookdate':
                                book['date'],
                                'booklang':
                                book['lang'],
                                'booklink':
                                book['link'],
                                'bookrate':
                                float(book['rate']),
                                'bookrate_count':
                                book['rate_count'],
                                'bookimg':
                                book['img'],
                                'bookpages':
                                book['pages'],
                                'bookgenre':
                                book['genre'],
                                'bookdesc':
                                book['desc'],
                                'author_fuzz':
                                author_fuzz,
                                'book_fuzz':
                                book_fuzz,
                                'isbn_fuzz':
                                isbn_fuzz,
                                'highest_fuzz':
                                highest_fuzz,
                                'num_reviews':
                                book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug(
                    "Returning %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" %
                         (no_author_count, plural(no_author_count)))
            logger.debug(
                'The Google Books API was hit %s time%s for searchterm: %s' %
                (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' %
                         traceback.format_exc())
Example #15
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName))

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0

                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s"
                        % (
                            dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                            subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        )
                    )
                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split(".")
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if "title" in res and "creator" in res:  # this is the minimum we need
                            match = 1
                            book = res["title"]
                            author = res["creator"]
                            if "language" in res:
                                language = res["language"]
                            if "identifier" in res:
                                isbn = res["identifier"]
                            if "type" in res:
                                extn = res["type"]
                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn))
                        else:
                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if "title" in res and "creator" in res:  # this is the minimum we need
                        match = 1
                        book = res["title"]
                        author = res["creator"]
                        if "language" in res:
                            language = res["language"]
                        if "identifier" in res:
                            isbn = res["identifier"]
                        logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                            if not match:
                                myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                            else:
                                logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(",")
                            author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                        if author[1] == " ":
                            author = author.replace(" ", ".")
                            author = author.replace("..", ".")

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' % author
                        ).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn("Error finding author id for [%s]" % author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr["authorname"]

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace(".", "_")
                                match_auth = match_auth.replace(" ", "_")
                                match_auth = match_auth.replace("__", "_")
                                match_name = authorname.replace(".", "_")
                                match_name = match_name.replace(" ", "_")
                                match_name = match_name.replace("__", "_")
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)
                                    )

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr["authorname"]
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' % author
                                    ).fetchone()
                                    if not check_exist_author:
                                        logger.debug("Adding new author [%s]" % author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' % author
                                            ).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug("Failed to match author [%s] in database" % author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', "").replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)
                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)
                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' % bookid
                                ).fetchone()
                                if check_status["Status"] != "Open":
                                    # update status as we've got this book
                                    myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid)
                                    book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open
                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)
                                    )
                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if stats["sum(GR_book_hits)"] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
        logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
        logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"])
        logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"])
        logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"])
        logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select("select AuthorName from authors")
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug("Updating bookcounts for %i authors" % len(authors))
    for author in authors:
        name = author["AuthorName"]
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")'
            % name
        ).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name
        ).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name))

    logger.info("Library scan complete")
Example #16
0
    def find_results(self, searchterm=None, queue=None):
        try:
            resultlist = []
            api_hits = 0
            # we don't use the title/author separator in goodreads
            searchterm = searchterm.replace(' <ll> ', '')

            url = urllib.quote_plus(searchterm.encode(lazylibrarian.SYS_ENCODING))
            set_url = 'http://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode(self.params)
            logger.debug('Now searching GoodReads API with searchterm: %s' % searchterm)
            #logger.debug('Searching for %s at: %s' % (searchterm, set_url))

            resultcount = 0
            try:
                try:
                    rootxml, in_cache = get_xml_request(set_url)
                except Exception as e:
                    logger.error("Error finding gr results: %s" % str(e))
                    return
                if not len(rootxml):
                    logger.debug("Error requesting results")
                    return

                resultxml = rootxml.getiterator('work')
                for author in resultxml:

                    if author.find('original_publication_year').text is None:
                        bookdate = "0000"
                    else:
                        bookdate = author.find('original_publication_year').text

                    authorNameResult = author.find('./best_book/author/name').text
                    # Goodreads sometimes puts extra whitepase in the author names!
                    authorNameResult =  ' '.join(authorNameResult.split())
                    booksub = ""
                    bookpub = ""
                    booklang = "Unknown"

                    try:
                        bookimg = author.find('./best_book/image_url').text
                        if bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png':
                            bookimg = 'images/nocover.png'
                    except (KeyError, AttributeError):
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = author.find('average_rating').text
                    except KeyError:
                        bookrate = 0

                    bookpages = '0'
                    bookgenre = ''
                    bookdesc = ''
                    bookisbn = ''
                    booklink = 'http://www.goodreads.com/book/show/' + author.find('./best_book/id').text

                    if author.find('./best_book/title').text is None:
                        bookTitle = ""
                    else:
                        bookTitle = author.find('./best_book/title').text

                    author_fuzz = fuzz.ratio(authorNameResult, searchterm)
                    book_fuzz = fuzz.ratio(bookTitle, searchterm)
                    isbn_fuzz = 0
                    if is_valid_isbn(searchterm):
                            isbn_fuzz = 100

                    highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                    bookid = author.find('./best_book/id').text

                    resultlist.append({
                        'authorname': author.find('./best_book/author/name').text,
                        'bookid': bookid,
                        'authorid': author.find('./best_book/author/id').text,
                        'bookname': bookTitle.encode("ascii", "ignore"),
                        'booksub': booksub,
                        'bookisbn': bookisbn,
                        'bookpub': bookpub,
                        'bookdate': bookdate,
                        'booklang': booklang,
                        'booklink': booklink,
                        'bookrate': float(bookrate),
                        'bookimg': bookimg,
                        'bookpages': bookpages,
                        'bookgenre': bookgenre,
                        'bookdesc': bookdesc,
                        'author_fuzz': author_fuzz,
                        'book_fuzz': book_fuzz,
                        'isbn_fuzz': isbn_fuzz,
                        'highest_fuzz': highest_fuzz,
                        'num_reviews': float(bookrate)
                    })

                    resultcount += 1

            except urllib2.HTTPError as err:
                if err.code == 404:
                    logger.error('Received a 404 error when searching for author')
                if err.code == 403:
                    logger.warn('Access to api is denied: usage exceeded')
                else:
                    logger.error('An unexpected error has occurred when searching for an author: %s' % str(err))

            logger.debug('Found %s result%s with keyword: %s' % (resultcount, plural(resultcount), searchterm))
            logger.debug(
                'The GoodReads API was hit %s time%s for keyword %s' % (api_hits, plural(api_hits), searchterm))

            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GR.find_results: %s' % traceback.format_exc())
Example #17
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.info(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep, 1)[0]
                                    coverimg = os.path.join(bookdir, 'cover.jpg')
                                    cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database" %
                                    (book, author))


    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                    (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                    (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)'])))
            logger.debug("Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug("Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)'])))
            logger.debug("Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug("Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug("Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug("Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
            nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
    if len(images):
        logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

    images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
    if len(images):
        logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count
Example #18
0
def get_book_info(fname):
    # only handles epub, mobi and opf for now,
    # for pdf see notes below
    res = {}
    extn = os.path.splitext(fname)[1]
    if not extn:
        return res
    if extn == ".mobi":
        res['type'] = "mobi"
        try:
            book = Mobi(fname)
            book.parse()
        except:
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        return res

        # none of the pdfs in my library had language,isbn
        # most didn't have author, or had the wrong author
        # (author set to publisher, or software used)
        # so probably not much point in looking at pdfs
        #
        # if (extn == ".pdf"):
        #	  pdf = PdfFileReader(open(fname, "rb"))
        #	  txt = pdf.getDocumentInfo()
              # repackage the data here to get components we need
        #     res = {}
        #     for s in ['title','language','creator']:
        #         res[s] = txt[s]
        #	  res['identifier'] = txt['isbn']
        #     res['type'] = "pdf"
        #	  return res

    elif extn == ".epub":
        res['type'] = "epub"

        # prepare to read from the .epub file
        try:
            zipdata = zipfile.ZipFile(fname)
        except:
            return res

        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
           return res

        while n < len(tree[0]):
           att = tree[0][n].attrib
           if 'full-path' in att:
               cfname = att['full-path']
               break
           n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)

    elif extn == ".opf":
        res['type'] = "opf"
        txt = open(fname).read()
        # sanitize any unmatched html tags or ElementTree won't parse
        dic = {'<br>': '', '</br>': ''}
        txt = replace_all(txt, dic)

    # repackage epub or opf metadata
    try:
        tree = ElementTree.fromstring(txt)
    except Exception as e:
        logger.error("Error parsing metadata from %s" % fname)
        logger.error(str(e))
        return res

    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).lower()
        if '}' in tag:
            tag = tag.split('}')[1]
            txt = tree[0][n].text
            attrib = str(tree[0][n].attrib).lower()
            if 'title' in tag:
                res['title'] = txt
            elif 'language' in tag:
                res['language'] = txt
            elif 'creator' in tag:
                res['creator'] = txt
            elif 'identifier' in tag and 'isbn' in attrib:
                if is_valid_isbn(txt):
                    res['identifier'] = txt
            n = n + 1
    return res
Example #19
0
def get_book_info(fname):
    # only handles epub, mobi and opf for now,
    # for pdf see below
    res = {}
    if '.' not in fname:
        return res
    words = fname.split('.')
    extn = words[len(words) - 1]

    if extn == "mobi":
        try:
            book = Mobi(fname)
            book.parse()
        except:
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        res['type'] = "mobi"
        return res

    """
    # none of the pdfs in my library had language,isbn
    # most didn't have author, or had the wrong author
    # (author set to publisher, or software used)
    # so probably not much point in looking at pdfs
    #
    # if (extn == "pdf"):
    #	  pdf = PdfFileReader(open(fname, "rb"))
    #	  txt = pdf.getDocumentInfo()
          # repackage the data here to get components we need
    #     res = {}
    #     for s in ['title','language','creator']:
    #         res[s] = txt[s]
    #	  res['identifier'] = txt['isbn']
    #     res['type'] = "pdf"
    #	  return res
    """

    if extn == "epub":
        # prepare to read from the .epub file
        zipdata = zipfile.ZipFile(fname)
        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res

        while n < len(tree[0]):
            att = tree[0][n].attrib
            if 'full-path' in att:
                cfname = att['full-path']
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)
        tree = ElementTree.fromstring(txt)
        res['type'] = "epub"
    else:
        if extn == "opf":
            txt = open(fname).read()
            tree = ElementTree.fromstring(txt)
            res['type'] = "opf"
        else:
            return ""

    # repackage the data
    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).split('}')[1]
        txt = tree[0][n].text
        attrib = str(tree[0][n].attrib)
        isbn = ""
        if 'title' in tag.lower():
            res['title'] = txt
        elif 'language' in tag.lower():
            res['language'] = txt
        elif 'creator' in tag.lower():
            res['creator'] = txt
        elif 'identifier' in tag.lower() and 'isbn' in attrib.lower():
            if formatter.is_valid_isbn(txt):
                res['identifier'] = txt
        n = n + 1
    return res
Example #20
0
def get_book_info(fname):
    # only handles epub, mobi, azw3 and opf for now,
    # for pdf see notes below
    res = {}
    extn = os.path.splitext(fname)[1]
    if not extn:
        return res
    if extn == ".mobi" or extn == ".azw3":
        res['type'] = extn[1:]
        try:
            book = Mobi(fname)
            book.parse()
        except Exception as e:
            logger.debug('Unable to parse mobi in %s, %s' % (fname, str(e)))
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        return res
        """
        # none of the pdfs in my library had language,isbn
        # most didn't have author, or had the wrong author
        # (author set to publisher, or software used)
        # so probably not much point in looking at pdfs
        #
        if (extn == ".pdf"):
            pdf = PdfFileReader(open(fname, "rb"))
            txt = pdf.getDocumentInfo()
            # repackage the data here to get components we need
            res = {}
            for s in ['title','language','creator']:
                res[s] = txt[s]
            res['identifier'] = txt['isbn']
            res['type'] = "pdf"
            return res
        """
    elif extn == ".epub":
        res['type'] = "epub"

        # prepare to read from the .epub file
        try:
            zipdata = zipfile.ZipFile(fname)
        except Exception as e:
            logger.debug('Unable to parse zipfile %s, %s' % (fname, str(e)))
            return res

        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res

        while n < len(tree[0]):
            att = tree[0][n].attrib
            if 'full-path' in att:
                cfname = att['full-path']
                break
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)

    elif extn == ".opf":
        res['type'] = "opf"
        txt = open(fname).read()
        # sanitize any unmatched html tags or ElementTree won't parse
        dic = {'<br>': '', '</br>': ''}
        txt = replace_all(txt, dic)

    # repackage epub or opf metadata
    try:
        tree = ElementTree.fromstring(txt)
    except Exception as e:
        logger.error("Error parsing metadata from %s, %s" % (fname, str(e)))
        return res

    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).lower()
        if '}' in tag:
            tag = tag.split('}')[1]
            txt = tree[0][n].text
            attrib = str(tree[0][n].attrib).lower()
            if 'title' in tag:
                res['title'] = txt
            elif 'language' in tag:
                res['language'] = txt
            elif 'creator' in tag:
                res['creator'] = txt
            elif 'identifier' in tag and 'isbn' in attrib:
                if is_valid_isbn(txt):
                    res['identifier'] = txt
        n = n + 1
    return res
Example #21
0
    def find_results(self, searchterm=None, queue=None):
        try:
            resultlist = []
            api_hits = 0
            searchtitle = ''
            searchauthorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                searchtitle, searchauthorname = searchterm.split(' <ll> ')
                searchterm = searchterm.replace(' <ll> ', ' ')

            searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
            url = urllib.quote_plus(searchterm)
            set_url = 'https://www.goodreads.com/search.xml?q=' + url + '&' + urllib.urlencode(self.params)
            logger.debug('Now searching GoodReads API with searchterm: %s' % searchterm)
            # logger.debug('Searching for %s at: %s' % (searchterm, set_url))

            resultcount = 0
            try:
                try:
                    rootxml, in_cache = get_xml_request(set_url)
                except Exception as e:
                    logger.error("%s finding gr results: %s" % (type(e).__name__, str(e)))
                    return
                if rootxml is None:
                    logger.debug("Error requesting results")
                    return

                totalresults = check_int(rootxml.find('search/total-results').text, 0)

                resultxml = rootxml.getiterator('work')
                loopCount = 1
                while resultxml:
                    for author in resultxml:
                        try:
                            if author.find('original_publication_year').text is None:
                                bookdate = "0000"
                            else:
                                bookdate = author.find('original_publication_year').text
                        except (KeyError, AttributeError):
                            bookdate = "0000"

                        try:
                            authorNameResult = author.find('./best_book/author/name').text
                            # Goodreads sometimes puts extra whitepase in the author names!
                            authorNameResult = ' '.join(authorNameResult.split())
                        except (KeyError, AttributeError):
                            authorNameResult = ""

                        booksub = ""
                        bookpub = ""
                        booklang = "Unknown"

                        try:
                            bookimg = author.find('./best_book/image_url').text
                            if bookimg == 'https://www.goodreads.com/assets/nocover/111x148.png':
                                bookimg = 'images/nocover.png'
                        except (KeyError, AttributeError):
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = author.find('average_rating').text
                        except KeyError:
                            bookrate = 0

                        bookpages = '0'
                        bookgenre = ''
                        bookdesc = ''
                        bookisbn = ''

                        try:
                            booklink = 'https://www.goodreads.com/book/show/' + author.find('./best_book/id').text
                        except (KeyError, AttributeError):
                            booklink = ""

                        try:
                            authorid = author.find('./best_book/author/id').text
                        except (KeyError, AttributeError):
                            authorid = ""

                        try:
                            if author.find('./best_book/title').text is None:
                                bookTitle = ""
                            else:
                                bookTitle = author.find('./best_book/title').text
                        except (KeyError, AttributeError):
                            bookTitle = ""

                        if searchauthorname:
                            author_fuzz = fuzz.ratio(authorNameResult, searchauthorname)
                        else:
                            author_fuzz = fuzz.ratio(authorNameResult, searchterm)
                        if searchtitle:
                            book_fuzz = fuzz.token_set_ratio(bookTitle, searchtitle)
                            # lose a point for each extra word in the fuzzy matches so we get the closest match
                            words = len(getList(bookTitle))
                            words -= len(getList(searchtitle))
                            book_fuzz -= abs(words)
                        else:
                            book_fuzz = fuzz.token_set_ratio(bookTitle, searchterm)
                            words = len(getList(bookTitle))
                            words -= len(getList(searchterm))
                            book_fuzz -= abs(words)
                        isbn_fuzz = 0
                        if is_valid_isbn(searchterm):
                            isbn_fuzz = 100

                        highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                        try:
                            bookid = author.find('./best_book/id').text
                        except (KeyError, AttributeError):
                            bookid = ""

                        resultlist.append({
                            'authorname': authorNameResult,
                            'bookid': bookid,
                            'authorid': authorid,
                            'bookname': bookTitle,
                            'booksub': booksub,
                            'bookisbn': bookisbn,
                            'bookpub': bookpub,
                            'bookdate': bookdate,
                            'booklang': booklang,
                            'booklink': booklink,
                            'bookrate': float(bookrate),
                            'bookimg': bookimg,
                            'bookpages': bookpages,
                            'bookgenre': bookgenre,
                            'bookdesc': bookdesc,
                            'author_fuzz': author_fuzz,
                            'book_fuzz': book_fuzz,
                            'isbn_fuzz': isbn_fuzz,
                            'highest_fuzz': highest_fuzz,
                            'num_reviews': float(bookrate)
                        })

                        resultcount += 1

                    loopCount += 1

                    if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < loopCount:
                        resultxml = None
                        logger.warn('Maximum results page search reached, still more results available')
                    elif totalresults and resultcount >= totalresults:
                        # fix for goodreads bug on isbn searches
                        resultxml = None
                    else:
                        URL = set_url + '&page=' + str(loopCount)
                        resultxml = None
                        try:
                            rootxml, in_cache = get_xml_request(URL)
                            if rootxml is None:
                                logger.debug('Error requesting page %s of results' % loopCount)
                            else:
                                resultxml = rootxml.getiterator('work')
                                if not in_cache:
                                    api_hits += 1
                        except Exception as e:
                            resultxml = None
                            logger.error("%s finding page %s of results: %s" % (type(e).__name__, loopCount, str(e)))

                    if resultxml:
                        if all(False for _ in resultxml):  # returns True if iterator is empty
                            resultxml = None

            except Exception as err:
                if err.code == 404:
                    logger.error('Received a 404 error when searching for author')
                if err.code == 403:
                    logger.warn('Access to api is denied: usage exceeded')
                else:
                    logger.error('An unexpected error has occurred when searching for an author: %s' % str(err))

            logger.debug('Found %s result%s with keyword: %s' % (resultcount, plural(resultcount), searchterm))
            logger.debug(
                'The GoodReads API was hit %s time%s for keyword %s' % (api_hits, plural(api_hits), searchterm))

            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GR.find_results: %s' % traceback.format_exc())
Example #22
0
def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    new_authors = []

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action(
                    'update books set Status="%s" where BookID="%s"' %
                    (status, bookID))
                myDB.action(
                    'update books set BookFile="" where BookID="%s"' %
                    bookID)
                logger.warn(
                    'Book %s - %s updated as not found on disk' %
                    (bookAuthor, bookName))
            # for book_type in getList(lazylibrarian.EBOOK_TYPE):
            #	bookName = book['BookName']
            #	bookAuthor = book['AuthorName']
            # Default destination path, should be allowed change per config file.
            #	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
            # dest_path = authorname+'/'+bookname
            #	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
#
            #	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
            #	if os.path.isfile(encoded_book_path):
            #		book_exists = True
            # if not book_exists:
            #	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
            # logger.info('Book %s updated as not found on disk' %
            # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same
            # subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
# 			If this is a book, try to get author/title/isbn/language
# 			If metadata.opf exists, use that
# 			else if epub or mobi, read metadata from the book
# 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR late
                match = 0
                extn = ""
                
                if '.' in files:
                    words = files.split('.')
                    extn = words[len(words) - 1]
                    
                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s" %
                        (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
                    
                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know 
                    # LL preferred authorname/bookname at this point
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        else:
                            language = ""
                        if 'identifier' in res:
                            isbn = res['identifier']
                        else:
                            isbn = ""
                        match = 1
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata
                        # from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            try:
                                res = get_book_info(book_filename)
                            except:
                                res = {}
                            if 'title' in res and 'creator' in res:  # this is the minimum we need
                                book = res['title']
                                author = res['creator']
                                if 'language' in res:
                                    language = res['language']
                                else:
                                    language = ""
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                else:
                                    isbn = ""
                                logger.debug("book meta [%s] [%s] [%s] [%s]" %
                                    (isbn, language, author, book))
                                match = 1
                            else:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                if match:
                    processed_subdirectories.append(
                        subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                    if not formatter.is_valid_isbn(isbn):
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug(
                            "Found Language [%s] ISBN [%s]" %
                            (language, isbn))
                        # we need to add it to language cache if not already
                        # there, is_valid_isbn has checked length is 10 or 13
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action(
                            'SELECT lang FROM languages where isbn = "%s"' %
                            (isbnhead)).fetchone()
                        if not match:
                            myDB.action(
                                'insert into languages values ("%s", "%s")' %
                                (isbnhead, language))
                            logger.debug(
                                "Cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))
                        else:
                            logger.debug(
                                "Already cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))

                    # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(',')
                        author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                    if author[1] == ' ':        
                        author = author.replace(' ', '.')
                        author = author.replace('..', '.')

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action(
                        'SELECT * FROM authors where AuthorName="%s"' %
                        author).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to
                        # add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.warn(
                                "Error finding author id for [%s]" %
                                author)
                            continue

                        # only try to add if GR data matches found author data
                        # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr['authorname']

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_name = authorname.replace('.', '_')
                            match_name = match_name.replace(' ', '_')
                            match_name = match_name.replace('__', '_')
                            match_name = common.remove_accents(match_name)
                            match_auth = common.remove_accents(match_auth)
                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The currently non-configurable value of fuzziness might need to go in config
                            # We stored GoodReads unmodified author name in
                            # author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.debug(
                                    "Failed to match author [%s] fuzz [%d]" %
                                    (author, match_fuzz))
                                logger.debug(
                                    "Failed to match author [%s] to authorname [%s]" %
                                    (match_auth, match_name))

                            # To save loading hundreds of books by unknown
                            # authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a
                                # different author!
                                author = author_gr['authorname']
                                # this new authorname may already be in the
                                # database, so check again
                                check_exist_author = myDB.action(
                                    'SELECT * FROM authors where AuthorName="%s"' %
                                    author).fetchone()
                                if not check_exist_author:
                                    logger.debug(
                                        "Adding new author [%s]" %
                                        author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            'SELECT * FROM authors where AuthorName="%s"' %
                                            author).fetchone()
                                    except:
                                        continue

                    # check author exists in db, either newly loaded or already
                    # there
                    if not check_exist_author:
                        logger.debug(
                            "Failed to match author [%s] in database" %
                            author)
                    else:
                        # author exists, check if this book by this author is in our database
                        # metadata might have quotes in book name
                        book = book.replace('"', '').replace("'", "")
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so,
                            # we already had it)
                            check_status = myDB.action(
                                'SELECT Status from books where BookID="%s"' %
                                bookid).fetchone()
                            if check_status['Status'] != 'Open':
                                # update status as we've got this book
                                myDB.action(
                                    'UPDATE books set Status="Open" where BookID="%s"' %
                                    bookid)
                                book_filename = os.path.join(
                                    r,
                                    files).encode(
                                        lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it
                                # gets removed, or allow click-to-open
                                myDB.action(
                                    'UPDATE books set BookFile="%s" where BookID="%s"' %
                                    (book_filename, bookid))
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    if new_book_count:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)'])
    stats = len(
        myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn(
            "There are %s books in your library with unknown language" %
            stats)

    logger.debug('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' %
                        ('Open', auth)))
        myDB.action(
            'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' %
            (havebooks, auth))
        totalbooks = len(
            myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' %
                        ('Ignored', auth)))
        myDB.action(
            'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' %
            (totalbooks, auth))

    logger.info('Library scan complete')
Example #23
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')
            else:
                title = ''
                authorname = ''

            fullterm = searchterm.replace(' <ll> ', '')
            logger.debug('Now searching Google Books API with searchterm: %s' %
                         fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + urllib.quote(
                        api_value +
                        searchterm.encode(lazylibrarian.SYS_ENCODING))
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        title = title.split(' (')[
                            0]  # with out any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace(
                        '"', '')  # and no quotes
                    searchterm = searchterm.strip()
                    set_url = set_url + \
                              urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    set_url = set_url + \
                              urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"')
                    searchterm = searchterm.strip()

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urllib.urlencode(self.params)

                        try:
                            jsonresults, in_cache = get_json_request(URL)
                            if not jsonresults:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn(
                                    'Found no results for %s with value: %s' %
                                    (api_value, searchterm))
                                break
                            else:
                                pass
                        except HTTPError as err:
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while'
                                % err.reason)
                            break

                        startindex += 40

                        for item in jsonresults['items']:

                            total_count += 1

                            # skip if no author, no author is no book.
                            try:
                                Author = item['volumeInfo']['authors'][0]
                            except KeyError:
                                logger.debug(
                                    'Skipped a result without authorfield.')
                                no_author_count += 1
                                continue
                            try:
                                bookname = item['volumeInfo']['title']
                            except KeyError:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(
                                lazylibrarian.CONFIG['IMP_PREFLANG'])
                            booklang = ''
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = item['volumeInfo']['language']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' %
                                            (bookname, booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug(
                                        'Skipped %s where no language is found'
                                        % bookname)
                                    continue

                            try:
                                bookpub = item['volumeInfo']['publisher']
                            except KeyError:
                                bookpub = ""

                            try:
                                booksub = item['volumeInfo']['subtitle']
                            except KeyError:
                                booksub = ""

                            try:
                                bookdate = item['volumeInfo']['publishedDate']
                            except KeyError:
                                bookdate = '0000-00-00'
                            bookdate = bookdate[:4]

                            try:
                                bookimg = item['volumeInfo']['imageLinks'][
                                    'thumbnail']
                            except KeyError:
                                bookimg = 'images/nocover.png'

                            try:
                                bookrate = item['volumeInfo']['averageRating']
                            except KeyError:
                                bookrate = 0

                            try:
                                bookpages = item['volumeInfo']['pageCount']
                            except KeyError:
                                bookpages = '0'

                            try:
                                bookgenre = item['volumeInfo']['categories'][0]
                            except KeyError:
                                bookgenre = ""

                            try:
                                bookdesc = item['volumeInfo']['description']
                            except KeyError:
                                bookdesc = 'Not available'

                            try:
                                num_reviews = item['volumeInfo'][
                                    'ratingsCount']
                            except KeyError:
                                num_reviews = 0

                            try:
                                if item['volumeInfo']['industryIdentifiers'][
                                        0]['type'] == 'ISBN_10':
                                    bookisbn = item['volumeInfo'][
                                        'industryIdentifiers'][0]['identifier']
                                else:
                                    bookisbn = 0
                            except KeyError:
                                bookisbn = 0

                            if authorname:
                                author_fuzz = fuzz.ratio(Author, authorname)
                            else:
                                author_fuzz = fuzz.ratio(Author, fullterm)

                            if title:
                                book_fuzz = fuzz.ratio(bookname, title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(bookname))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.ratio(bookname, fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2,
                                               isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(bookname, dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace
                            bookid = item['id']

                            author = myDB.select(
                                'SELECT AuthorID FROM authors WHERE AuthorName = "%s"'
                                % Author.replace('"', '""'))
                            if author:
                                AuthorID = author[0]['authorid']
                            else:
                                AuthorID = ''

                            resultlist.append({
                                'authorname':
                                Author,
                                'authorid':
                                AuthorID,
                                'bookid':
                                bookid,
                                'bookname':
                                bookname,
                                'booksub':
                                booksub,
                                'bookisbn':
                                bookisbn,
                                'bookpub':
                                bookpub,
                                'bookdate':
                                bookdate,
                                'booklang':
                                booklang,
                                'booklink':
                                item['volumeInfo']['canonicalVolumeLink'],
                                'bookrate':
                                float(bookrate),
                                'bookimg':
                                bookimg,
                                'bookpages':
                                bookpages,
                                'bookgenre':
                                bookgenre,
                                'bookdesc':
                                bookdesc,
                                'author_fuzz':
                                author_fuzz,
                                'book_fuzz':
                                book_fuzz,
                                'isbn_fuzz':
                                isbn_fuzz,
                                'highest_fuzz':
                                highest_fuzz,
                                'num_reviews':
                                num_reviews
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug(
                    "Returning %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" %
                         (no_author_count, plural(no_author_count)))
            logger.debug(
                'The Google Books API was hit %s time%s for searchterm: %s' %
                (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' %
                         traceback.format_exc())
Example #24
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    try:
        destdir = lazylibrarian.DIRECTORY('Destination')
        if not startdir:
            if not destdir:
                logger.warn('Cannot find destination directory: %s. Not scanning' % destdir)
                return 0
            startdir = destdir

        if not os.path.isdir(startdir):
            logger.warn('Cannot find directory: %s. Not scanning' % startdir)
            return 0

        if not internet():
            logger.warn('Libraryscan: No internet connection')
            return 0

        myDB = database.DBConnection()

        # keep statistics of full library scans
        if startdir == destdir:
            myDB.action('DELETE from stats')
            try:  # remove any extra whitespace in authornames
                authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "%  %"')
                if authors:
                    logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        authorname = ' '.join(author['AuthorName'].split())
                        # Have we got author name both with-and-without extra spaces? If so, merge them
                        duplicate = myDB.match(
                            'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname)
                        if duplicate:
                            myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName'])
                            if author['AuthorID'] != duplicate['AuthorID']:
                                myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' %
                                            (duplicate['AuthorID'], author['AuthorID']))
                        else:
                            myDB.action(
                                'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
            except Exception as e:
                logger.info('Error: ' + str(e))

        logger.info('Scanning ebook directory: %s' % startdir)

        new_book_count = 0
        modified_count = 0
        rescan_count = 0
        rescan_hits = 0
        file_count = 0
        author = ""

        if lazylibrarian.CONFIG['FULL_SCAN']:
            cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors'
            cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"'
            if not startdir == destdir:
                cmd += ' and BookFile like "' + startdir + '%"'
            books = myDB.select(cmd)
            status = lazylibrarian.CONFIG['NOTFOUND_STATUS']
            logger.info('Missing books will be marked as %s' % status)
            for book in books:
                bookID = book['BookID']
                bookfile = book['BookFile']

                if not (bookfile and os.path.isfile(bookfile)):
                    myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                    myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                    logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName']))

        # to save repeat-scans of the same directory if it contains multiple formats of the same book,
        # keep track of which directories we've already looked at
        processed_subdirectories = []
        warned = False  # have we warned about no new authors setting
        matchString = ''
        for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the EBOOK_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
        pattern = re.compile(matchString, re.VERBOSE)

        for r, d, f in os.walk(startdir):
            for directory in d[:]:
                # prevent magazine being scanned
                if directory.startswith("_") or directory.startswith("."):
                    d.remove(directory)

            for files in f:
                file_count += 1

                if isinstance(r, str):
                    r = r.decode(lazylibrarian.SYS_ENCODING)

                subdirectory = r.replace(startdir, '')
                # Added new code to skip if we've done this directory before.
                # Made this conditional with a switch in config.ini
                # in case user keeps multiple different books in the same subdirectory
                if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories):
                    logger.debug("[%s] already scanned" % subdirectory)
                else:
                    # If this is a book, try to get author/title/isbn/language
                    # if epub or mobi, read metadata from the book
                    # If metadata.opf exists, use that allowing it to override
                    # embedded metadata. User may have edited metadata.opf
                    # to merge author aliases together
                    # If all else fails, try pattern match for author/title
                    # and look up isbn/lang from LT or GR later
                    match = 0
                    if is_valid_booktype(files):

                        logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory))

                        language = "Unknown"
                        isbn = ""
                        book = ""
                        author = ""
                        gr_id = ""
                        gb_id = ""
                        extn = os.path.splitext(files)[1]

                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == ".epub") or (extn == ".mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)

                            try:
                                res = get_book_info(book_filename)
                            except Exception as e:
                                logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e)))
                                res = {}
                            # title and creator are the minimum we need
                            if 'title' in res and 'creator' in res:
                                book = res['title']
                                author = res['creator']
                                if book and len(book) > 2 and author and len(author) > 2:
                                    match = 1
                                if 'language' in res:
                                    language = res['language']
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                if 'type' in res:
                                    extn = res['type']
                                logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                             (isbn, language, author, book, extn))
                            if not match:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                        # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                        # just look for any .opf file in the current directory since we don't know
                        # LL preferred authorname/bookname at this point.
                        # Allow metadata in file to override book contents as may be users pref

                        metafile = opf_file(r)
                        try:
                            res = get_book_info(metafile)
                        except Exception as e:
                            logger.debug('get_book_info failed for %s, %s' % (metafile, str(e)))
                            res = {}
                        # title and creator are the minimum we need
                        if 'title' in res and 'creator' in res:
                            book = res['title']
                            author = res['creator']
                            if book and len(book) > 2 and author and len(author) > 2:
                                match = 1
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'gr_id' in res:
                                gr_id = res['gr_id']
                            logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id))
                        if not match:
                            logger.debug("File meta incomplete in %s" % metafile)

                        if not match:  # no author/book from metadata file, and not embedded either
                            match = pattern.match(files)
                            if match:
                                author = match.group("author")
                                book = match.group("book")
                                if len(book) <= 2 or len(author) <= 2:
                                    match = 0
                            if not match:
                                logger.debug("Pattern match failed [%s]" % files)

                        if match:
                            # flag that we found a book in this subdirectory
                            processed_subdirectories.append(subdirectory)

                            # If we have a valid looking isbn, and language != "Unknown", add it to cache
                            if language != "Unknown" and is_valid_isbn(isbn):
                                logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                                # we need to add it to language cache if not already
                                # there, is_valid_isbn has checked length is 10 or 13
                                if len(isbn) == 10:
                                    isbnhead = isbn[0:3]
                                else:
                                    isbnhead = isbn[3:6]
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if not match:
                                    myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                    logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                                else:
                                    logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            author, authorid, new = addAuthorNameToDB(author)  # get the author name as we know it...

                            if author:
                                # author exists, check if this book by this author is in our database
                                # metadata might have quotes in book name
                                # some books might be stored under a different author name
                                # eg books by multiple authors, books where author is "writing as"
                                # or books we moved to "merge" authors
                                book = book.replace("'", "")

                                # First try and find it under author and bookname
                                # as we may have it under a different bookid or isbn to goodreads/googlebooks
                                # which might have several bookid/isbn for the same book
                                bookid = find_book_in_db(myDB, author, book)

                                if not bookid:
                                    # Title or author name might not match or multiple authors
                                    # See if the gr_id, gb_id is already in our database
                                    if gr_id:
                                        bookid = gr_id
                                    elif gb_id:
                                        bookid = gb_id
                                    else:
                                        bookid = ""

                                    if bookid:
                                        match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid)
                                        if not match:
                                            msg = 'Unable to find book %s by %s in database, trying to add it using '
                                            if bookid == gr_id:
                                                msg += "GoodReads ID " + gr_id
                                            if bookid == gb_id:
                                                msg += "GoogleBooks ID " + gb_id
                                            logger.debug(msg % (book, author))
                                            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id:
                                                GR_ID = GoodReads(gr_id)
                                                GR_ID.find_book(gr_id, None)
                                            elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id:
                                                GB_ID = GoogleBooks(gb_id)
                                                GB_ID.find_book(gb_id, None)
                                            # see if it's there now...
                                            match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid)
                                            if not match:
                                                logger.debug("Unable to add bookid %s to database" % bookid)
                                                bookid = ""

                                if not bookid and isbn:
                                    # See if the isbn is in our database
                                    match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn)
                                    if match:
                                        bookid = match['BookID']

                                if not bookid:
                                    # get author name from parent directory of this book directory
                                    newauthor = os.path.basename(os.path.dirname(r))
                                    # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_
                                    if newauthor.endswith('_'):
                                        newauthor = newauthor[:-1] + '.'
                                    if author.lower() != newauthor.lower():
                                        logger.debug("Trying authorname [%s]" % newauthor)
                                        bookid = find_book_in_db(myDB, newauthor, book)
                                        if bookid:
                                            logger.warn("%s not found under [%s], found under [%s]" %
                                                        (book, author, newauthor))

                                # at this point if we still have no bookid, it looks like we
                                # have author and book title but no database entry for it
                                if not bookid:
                                    if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                                        # Either goodreads doesn't have the book or it didn't match language prefs
                                        # Since we have the book anyway, try and reload it ignoring language prefs
                                        rescan_count += 1
                                        base_url = 'http://www.goodreads.com/search.xml?q='
                                        params = {"key": lazylibrarian.CONFIG['GR_API']}
                                        if author[1] in '. ':
                                            surname = author
                                            forename = ''
                                            while surname[1] in '. ':
                                                forename = forename + surname[0] + '.'
                                                surname = surname[2:].strip()
                                            if author != forename + ' ' + surname:
                                                logger.debug('Stripped authorname [%s] to [%s %s]' %
                                                            (author, forename, surname))
                                                author = forename + ' ' + surname

                                        author = ' '.join(author.split())  # ensure no extra whitespace

                                        searchname = author + ' ' + book
                                        searchname = cleanName(unaccented(searchname))
                                        searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING))
                                        set_url = base_url + searchterm + '&' + urllib.urlencode(params)
                                        try:
                                            rootxml, in_cache = get_xml_request(set_url)
                                            if not len(rootxml):
                                                logger.debug("Error requesting results from GoodReads")
                                            else:
                                                resultxml = rootxml.getiterator('work')
                                                for item in resultxml:
                                                    booktitle = item.find('./best_book/title').text
                                                    book_fuzz = fuzz.token_set_ratio(booktitle, book)
                                                    if book_fuzz >= 98:
                                                        logger.debug("Rescan found %s : %s" % (booktitle, language))
                                                        rescan_hits += 1
                                                        bookid = item.find('./best_book/id').text
                                                        GR_ID = GoodReads(bookid)
                                                        GR_ID.find_book(bookid, None)
                                                        if language and language != "Unknown":
                                                            # set language from book metadata
                                                            logger.debug("Setting language from metadata %s : %s" % (booktitle, language))
                                                            myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' %
                                                                        (language, bookid))
                                                        break
                                                if not bookid:
                                                    logger.warn("GoodReads doesn't know about %s" % book)
                                        except Exception as e:
                                            logger.error("Error finding rescan results: %s" % str(e))

                                    elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                                        # if we get here using googlebooks it's because googlebooks
                                        # doesn't have the book. No point in looking for it again.
                                        logger.warn("GoogleBooks doesn't know about %s" % book)

                                # see if it's there now...
                                if bookid:
                                    cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors '
                                    cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid
                                    check_status = myDB.match(cmd)

                                    if not check_status:
                                        logger.debug('Unable to find bookid %s in database' % bookid)
                                    else:
                                        if check_status['Status'] != 'Open':
                                            # we found a new book
                                            new_book_count += 1
                                            myDB.action(
                                                'UPDATE books set Status="Open" where BookID="%s"' % bookid)

                                        # store book location so we can check if it gets removed
                                        book_filename = os.path.join(r, files)
                                        if not check_status['BookFile']:  # no previous location
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))
                                        # location may have changed since last scan
                                        elif book_filename != check_status['BookFile']:
                                            modified_count += 1
                                            logger.warn("Updating book location for %s %s from %s to %s" %
                                                        (author, book, check_status['BookFile'], book_filename))
                                            logger.debug("%s %s matched %s BookID %s, [%s][%s]" %
                                                        (author, book, check_status['Status'], bookid,
                                                        check_status['AuthorName'], check_status['BookName']))
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))

                                        # update cover file to cover.jpg in book folder (if exists)
                                        bookdir = os.path.dirname(book_filename)
                                        coverimg = os.path.join(bookdir, 'cover.jpg')
                                        if os.path.isfile(coverimg):
                                            cachedir = lazylibrarian.CACHEDIR
                                            cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg')
                                            copyfile(coverimg, cacheimg)
                                else:
                                    logger.warn(
                                        "Failed to match book [%s] by [%s] in database" % (book, author))
                            else:
                                if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']:
                                    logger.warn("Add authors to database is disabled")
                                    warned = True

        logger.info("%s/%s new/modified book%s found and added to the database" %
                    (new_book_count, modified_count, plural(new_book_count + modified_count)))
        logger.info("%s file%s processed" % (file_count, plural(file_count)))

        if startdir == destdir:
            # On full library scans, check for missing workpages
            setWorkPages()
            # and books with unknown language
            nolang = myDB.match(
                "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'")
            nolang = nolang['counter']
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))
                # show stats if new books were added
            stats = myDB.match(
                "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                    sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")

            st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'],
                 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'],
                 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'],
                 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'],
                 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']}

            for item in st.keys():
                if st[item] is None:
                    st[item] = 0

            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (st['GB_lang_change'], plural(st['GB_lang_change'])))
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (st['GR_lang_hits'], plural(st['GR_lang_hits'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (st['LT_lang_hits'], plural(st['LT_lang_hits'])))
            logger.debug("Language cache was hit %s time%s" %
                         (st['cache_hits'], plural(st['cache_hits'])))
            logger.debug("Unwanted language removed %s book%s" %
                         (st['bad_lang'], plural(st['bad_lang'])))
            logger.debug("Unwanted characters removed %s book%s" %
                         (st['bad_char'], plural(st['bad_char'])))
            logger.debug("Unable to cache language for %s book%s with missing ISBN" %
                         (st['uncached'], plural(st['uncached'])))
            logger.debug("Found %s duplicate book%s" %
                         (st['duplicates'], plural(st['duplicates'])))
            logger.debug("Rescan %s hit%s, %s miss" %
                         (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits))
            logger.debug("Cache %s hit%s, %s miss" %
                         (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.match("select count('ISBN') as counter from languages")
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])

            # Cache any covers and images
            images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
            if len(images):
                logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    bookid = item['bookid']
                    bookimg = item['bookimg']
                    # bookname = item['bookname']
                    newimg, success = cache_img("book", bookid, bookimg)
                    if success:
                        myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

            images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
            if len(images):
                logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    authorid = item['authorid']
                    authorimg = item['authorimg']
                    # authorname = item['authorname']
                    newimg, success = cache_img("author", authorid, authorimg)
                    if success:
                        myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))

            # On full scan, update bookcounts for all authors, not just new ones - refresh may have located
            # new books for existing authors especially if switched provider gb/gr or changed wanted languages
            authors = myDB.select('select AuthorID from authors')
        else:
            # On single author/book import, just update bookcount for that author
            authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""'))

        logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
        for author in authors:
            update_totals(author['AuthorID'])

        logger.info('Library scan complete')
        return new_book_count

    except Exception:
        logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
Example #25
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (
                                        book['author'].replace('"', '""'),))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())