예제 #1
0
def import_CSV(search_dir=None, library='eBook'):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir, library=library)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No %s CSV file found in %s" % (library, search_dir)
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" % authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                     addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname, library=library)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        if library == 'eBook':
                            bookstatus = bookmatch['Status']
                        else:
                            bookstatus = bookmatch['AudioStatus']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info('Found %s %s by %s, already marked as "%s"' %
                                        (library, bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info('Found %s %s by %s, marking as "Wanted"' % (library, bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            if library == 'eBook':
                                newValueDict = {"Status": "Wanted"}
                            else:
                                newValueDict = {"AudioStatus": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info("Found (%s%% %s%%) %s: %s for %s: %s" %
                                        (result['author_fuzz'], result['book_fuzz'],
                                         result['authorname'], result['bookname'],
                                         authorname, title))
                            if library == 'eBook':
                                import_book(result['bookid'], ebook="Wanted", wait=True)
                            else:
                                import_book(result['bookid'], audio="Wanted", wait=True)
                            imported = myDB.match('select * from books where BookID=?', (result['bookid'],))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i %s%s in csv file, %i already existing or wanted" % (total, library,
                                                                                plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i %s%s as 'Wanted', %i %s%s not found" % \
                  (authcount, plural(authcount), bookcount, library, plural(bookcount),
                   skipcount, plural(skipcount), library)
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" % csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' % (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" % csvFile)
                    if os.path.isdir(csvFile + '.fail'):
                        try:
                            shutil.rmtree(csvFile + '.fail')
                        except Exception as why:
                            logger.warn("Unable to remove %s, %s %s" % (csvFile + '.fail',
                                                                        type(why).__name__, str(why)))
                    try:
                        _ = safe_move(csvFile, csvFile + '.fail')
                    except Exception as e:
                        logger.error("Unable to rename %s, %s %s" %
                                     (csvFile, type(e).__name__, str(e)))
                        if not os.access(csvFile, os.R_OK):
                            logger.error("%s is not readable" % csvFile)
                        if not os.access(csvFile, os.W_OK):
                            logger.error("%s is not writeable" % csvFile)
                        parent = os.path.dirname(csvFile)
                        try:
                            with open(os.path.join(parent, 'll_temp'), 'w') as f:
                                f.write('test')
                            os.remove(os.path.join(parent, 'll_temp'))
                        except Exception as why:
                            logger.error("Directory %s is not writeable: %s" % (parent, why))
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
예제 #2
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % str(e))
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
        #
        # PAB user has said they want this book, don't block for unwanted language, just warn
        #
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage))

        if rootxml.find('./book/publication_year').text is None:
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' %  author['authorname'])
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']    # we have a different authorid for that authorname
                else:   # no author but request to add book, add author as "ignored"
                        # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname))
            return

        bookname = unaccented(bookname)
        bookname, booksub = split_title(authorname, bookname)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic).strip()
        booksub = replace_all(booksub, dic).strip()
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": "",
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link, success = cache_img("book", bookid, bookimg)
            if success:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)
            else:
                logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
예제 #3
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match(
                        'SELECT * FROM authors where AuthorName=?',
                        (authorname, ))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" %
                                     authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(
                            author=authorname,
                            addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug(
                                "Preferred authorname changed from [%s] to [%s]"
                                % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info(
                                'Found book %s by %s, already marked as "%s"' %
                                (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info(
                                "Found (%s%% %s%%) %s: %s for %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname'],
                                 authorname, title))
                            import_book(result['bookid'], wait=True)
                            imported = myDB.match(
                                'select * from books where BookID=?',
                                (result['bookid'], ))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i book%s in csv file, %i already existing or wanted" % (
                total, plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" %
                                csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' %
                                    (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" %
                                csvFile)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
예제 #4
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False):
        try:
            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
            except Exception as e:
                logger.error("Error fetching author books: %s" % str(e))
                return
            if rootxml is None:
                logger.debug("Error requesting author books")
                return
            if not in_cache:
                api_hits += 1
            resultxml = rootxml.getiterator('book')

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0

            if not len(resultxml):
                logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
            else:
                logger.debug("[%s] Now processing books with GoodReads API" % authorname)
                logger.debug(u"url " + URL)

                authorNameResult = rootxml.find('./author/name').text
                # Goodreads sometimes puts extra whitepase in the author names!
                authorNameResult =  ' '.join(authorNameResult.split())
                logger.debug(u"GoodReads author name [%s]" % authorNameResult)
                loopCount = 1

                while resultxml:
                    for book in resultxml:
                        total_count += 1

                        if book.find('publication_year').text is None:
                            pubyear = "0000"
                        else:
                            pubyear = book.find('publication_year').text

                        try:
                            bookimg = book.find('image_url').text
                            if 'nocover' in bookimg:
                                bookimg = 'images/nocover.png'
                        except (KeyError, AttributeError):
                            bookimg = 'images/nocover.png'

                        bookLanguage = "Unknown"
                        find_field = "id"
                        isbn = ""
                        isbnhead = ""
                        if "All" not in valid_langs:  # do we care about language
                            if book.find('isbn').text:
                                find_field = "isbn"
                                isbn = book.find('isbn').text
                                isbnhead = isbn[0:3]
                            else:
                                if book.find('isbn13').text:
                                    find_field = "isbn13"
                                    isbn = book.find('isbn13').text
                                    isbnhead = isbn[3:6]
                            # Try to use shortcut of ISBN identifier codes described here...
                            # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups
                            if isbnhead:
                                if find_field == "isbn13" and isbn.startswith('979'):
                                    for item in lazylibrarian.isbn_979_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_979_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead))
                                elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')):
                                    for item in lazylibrarian.isbn_978_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_978_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead))

                            if bookLanguage == "Unknown" and isbnhead:
                                # Nothing in the isbn dictionary, try any cached results
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if match:
                                    bookLanguage = match['lang']
                                    cache_hits += 1
                                    logger.debug("Found cached language [%s] for %s [%s]" %
                                                 (bookLanguage, find_field, isbnhead))
                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                    try:
                                        librarything_wait()
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits += 1
                                        logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if 'invalid' in resp or 'Unknown' in resp:
                                            bookLanguage = "Unknown"
                                        else:
                                            bookLanguage = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage))
                                    except Exception as e:
                                        logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e)))

                            if bookLanguage == "Unknown":
                                # still  no earlier match, we'll have to search the goodreads api
                                try:
                                    if book.find(find_field).text:
                                        BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                                   book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                        logger.debug(u"Book URL: " + BOOK_URL)

                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        bookLanguage = ""
                                        try:
                                            BOOK_rootxml, in_cache = get_xml_request(BOOK_URL)
                                            if BOOK_rootxml is None:
                                                logger.debug('Error requesting book language code')
                                            else:
                                                if not in_cache:
                                                    # only update last_goodreads if the result wasn't found in the cache
                                                    lazylibrarian.LAST_GOODREADS = time_now
                                                try:
                                                    bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                                except Exception as e:
                                                    logger.debug("Error finding language_code in book xml: %s" % str(e))
                                        except Exception as e:
                                            logger.debug("Error getting book xml: %s" % str(e))

                                        if not in_cache:
                                            gr_lang_hits += 1
                                        if not bookLanguage:
                                            bookLanguage = "Unknown"
                                            # At this point, give up?
                                            # WhatWork on author/title doesn't give us a language.
                                            # It might give us the "original language" of the book (but not always)
                                            # and our copy might not be in the original language anyway
                                            # eg "The Girl With the Dragon Tattoo" original language Swedish
                                            # If we have an isbn, try WhatISBN to get alternatives
                                            # in case any of them give us a language, but it seems if thinglang doesn't
                                            # have a language for the first isbn code, it doesn't for any of the
                                            # alternatives either
                                            # Goodreads search results don't include the language. Although sometimes
                                            # it's in the html page, it's not in the xml results

                                        if isbnhead != "":
                                            # if GR didn't give an isbn we can't cache it, just use language for this book
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug("GoodReads reports language [%s] for %s" %
                                                         (bookLanguage, isbnhead))
                                        else:
                                            not_cached += 1

                                        logger.debug(u"GR language: " + bookLanguage)
                                    else:
                                        logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                        # continue

                                except Exception as e:
                                    logger.debug(u"Goodreads language search failed: %s" % str(e))

                            if bookLanguage not in valid_langs:
                                logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage))
                                ignored += 1
                                continue

                        bookname = book.find('title').text
                        bookid = book.find('id').text
                        bookdesc = book.find('description').text
                        bookisbn = book.find('isbn').text
                        bookpub = book.find('publisher').text
                        booklink = book.find('link').text
                        bookrate = float(book.find('average_rating').text)
                        bookpages = book.find('num_pages').text
                        bookname = unaccented(bookname)

                        bookname, booksub = split_title(authorNameResult, bookname)

                        dic = {':': '.', '"': ''}  # do we need to strip apostrophes , '\'': ''}
                        bookname = replace_all(bookname, dic)
                        bookname = bookname.strip()  # strip whitespace
                        booksub = replace_all(booksub, dic)
                        booksub = booksub.strip()  # strip whitespace
                        if booksub:
                            series, seriesNum = bookSeries(booksub)
                        else:
                            series, seriesNum = bookSeries(bookname)

                        rejected = False
                        check_status = False

                        if re.match('[^\w-]', bookname):  # reject books with bad characters in title
                            logger.debug(u"removed result [" + bookname + "] for bad characters")
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            if pubyear > today()[:4]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear))
                                removedResults += 1
                                rejected = True

                        if not rejected and not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' %
                                         (bookid, authorNameResult))
                            removedResults += 1
                            rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \
                                    (bookname, authorNameResult.replace('"', '""'))
                            match = myDB.match(cmd)
                            if match:
                                if match['BookID'] != bookid:
                                    # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorNameResult, bookname, bookid))
                                    duplicates += 1
                                    rejected = True

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid
                            match = myDB.match(cmd)
                            if match:
                                # we have a book with this bookid already
                                if bookname != match['BookName'] or authorNameResult != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorNameResult, bookname,
                                                 match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorNameResult, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid)
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                locked = False

                            # Is the book already in the database?
                            # Leave alone if locked or status "ignore"
                            if not locked and book_status != "Ignored":
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": "",
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today()
                                }

                                resultsCount += 1
                                updated = False

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    else:
                                        if series:
                                            seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug(u"[%s] Added book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(u"[%s] Updated book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1

                    loopCount += 1
                    URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                          urllib.urlencode(self.params) + '&page=' + str(loopCount)
                    resultxml = None
                    try:
                        rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
                        if rootxml is None:
                            logger.debug('Error requesting next page of results')
                        else:
                            resultxml = rootxml.getiterator('book')
                            if not in_cache:
                                api_hits += 1
                    except Exception as e:
                        resultxml = None
                        logger.error("Error finding next page of results: %s" % str(e))

                    if resultxml:
                        if all(False for _ in resultxml):  # returns True if iterator is empty
                            resultxml = None

            deleteEmptySeries()
            lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC' % authorid)
            if lastbook:
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": "Active",
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }
            myDB.upsert("authors", newValueDict, controlValueDict)

            # This is here because GoodReads sometimes has several entries with the same BookID!
            modified_count = added_count + updated_count

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count)))

            myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
예제 #5
0
def find_book_in_db(myDB, author, book):
    # PAB fuzzy search for book in library, return LL bookid if found or zero
    # if not, return bookid to more easily update status
    # prefer an exact match on author & book
    match = myDB.match('SELECT BookID FROM books where AuthorName="%s" and BookName="%s"' %
                       (author.replace('"', '""'), book.replace('"', '""')))
    if match:
        logger.debug('Exact match [%s]' % book)
        return match['BookID']
    else:
        # Try a more complex fuzzy match against each book in the db by this author
        # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>85)
        # These are results that work well on my library, minimal false matches and no misses
        # on books that should be matched
        # Maybe make ratios configurable in config.ini later

        books = myDB.select('SELECT BookID,BookName,BookISBN FROM books where AuthorName="%s"' %
                            author.replace('"', '""'))
        best_ratio = 0
        best_partial = 0
        best_partname = 0
        ratio_name = ""
        partial_name = ""
        partname_name = ""
        ratio_id = 0
        partial_id = 0
        partname_id = 0
        partname = 0

        book_lower = unaccented(book.lower())
        book_partname, book_sub = split_title(author, book_lower)
        if book_partname == book_lower:
            book_partname = ''

        for a_book in books:
            # tidy up everything to raise fuzziness scores
            # still need to lowercase for matching against partial_name later on
            a_book_lower = unaccented(a_book['BookName'].lower())
            #
            ratio = fuzz.ratio(book_lower, a_book_lower)
            partial = fuzz.partial_ratio(book_lower, a_book_lower)
            if book_partname:
                partname = fuzz.partial_ratio(book_partname, a_book_lower)

            # lose a point for each extra word in the fuzzy matches so we get the closest match
            words = len(getList(book_lower))
            words -= len(getList(a_book_lower))
            ratio -= abs(words)
            partial -= abs(words)

            if ratio > best_ratio:
                best_ratio = ratio
                ratio_name = a_book['BookName']
                ratio_id = a_book['BookID']
            if partial > best_partial:
                best_partial = partial
                partial_name = a_book['BookName']
                partial_id = a_book['BookID']
            if partname > best_partname:
                best_partname = partname
                partname_name = a_book['BookName']
                partname_id = a_book['BookID']

            if partial == best_partial:
                # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest
                # this eliminates most false matches against omnibuses when we want a single book
                # find the position of the shortest string in the longest
                if len(getList(book_lower)) >= len(getList(a_book_lower)):
                    match1 = book_lower.find(a_book_lower)
                else:
                    match1 = a_book_lower.find(book_lower)

                if len(getList(book_lower)) >= len(getList(partial_name.lower())):
                    match2 = book_lower.find(partial_name.lower())
                else:
                    match2 = partial_name.lower().find(book_lower)

                if match1 < match2:
                    logger.debug(
                        "Fuzz left change, prefer [%s] over [%s] for [%s]" %
                        (a_book['BookName'], partial_name, book))
                    best_partial = partial
                    partial_name = a_book['BookName']
                    partial_id = a_book['BookID']

        if best_ratio > 90:
            logger.debug(
                "Fuzz match   ratio [%d] [%s] [%s]" % (best_ratio, book, ratio_name))
            return ratio_id
        if best_partial > 85:
            logger.debug(
                "Fuzz match partial [%d] [%s] [%s]" % (best_partial, book, partial_name))
            return partial_id
        if best_partname > 95:
            logger.debug(
                "Fuzz match partname [%d] [%s] [%s]" % (best_partname, book, partname_name))
            return partname_id

        logger.debug(
            'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s], partname [%d,%s]' %
            (author, book, best_ratio, ratio_name, best_partial, partial_name, best_partname, partname_name))
        return 0
예제 #6
0
def find_book_in_db(myDB, author, book):
    # PAB fuzzy search for book in library, return LL bookid if found or zero
    # if not, return bookid to more easily update status
    # prefer an exact match on author & book
    cmd = 'SELECT BookID FROM books,authors where books.AuthorID = authors.AuthorID '
    cmd += 'and AuthorName="%s" COLLATE NOCASE and BookName="%s" COLLATE NOCASE' % \
            (author.replace('"', '""'), book.replace('"', '""'))
    match = myDB.match(cmd)
    if match:
        logger.debug('Exact match [%s]' % book)
        return match['BookID']
    else:
        # Try a more complex fuzzy match against each book in the db by this author
        # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>85)
        # These are results that work well on my library, minimal false matches and no misses
        # on books that should be matched
        # Maybe make ratios configurable in config.ini later
        cmd = 'SELECT BookID,BookName,BookISBN FROM books,authors where books.AuthorID = authors.AuthorID '
        cmd += 'and AuthorName="%s" COLLATE NOCASE' % author.replace('"', '""')
        books = myDB.select(cmd)
        best_ratio = 0
        best_partial = 0
        best_partname = 0
        ratio_name = ""
        partial_name = ""
        partname_name = ""
        ratio_id = 0
        partial_id = 0
        partname_id = 0
        partname = 0

        book_lower = unaccented(book.lower())
        book_partname, book_sub = split_title(author, book_lower)
        if book_partname == book_lower:
            book_partname = ''

        for a_book in books:
            # tidy up everything to raise fuzziness scores
            # still need to lowercase for matching against partial_name later on
            a_book_lower = unaccented(a_book['BookName'].lower())
            #
            ratio = fuzz.ratio(book_lower, a_book_lower)
            partial = fuzz.partial_ratio(book_lower, a_book_lower)
            if book_partname:
                partname = fuzz.partial_ratio(book_partname, a_book_lower)

            # lose a point for each extra word in the fuzzy matches so we get the closest match
            words = len(getList(book_lower))
            words -= len(getList(a_book_lower))
            ratio -= abs(words)
            partial -= abs(words)

            if ratio > best_ratio:
                best_ratio = ratio
                ratio_name = a_book['BookName']
                ratio_id = a_book['BookID']
            if partial > best_partial:
                best_partial = partial
                partial_name = a_book['BookName']
                partial_id = a_book['BookID']
            if partname > best_partname:
                best_partname = partname
                partname_name = a_book['BookName']
                partname_id = a_book['BookID']

            if partial == best_partial:
                # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest
                # this eliminates most false matches against omnibuses when we want a single book
                # find the position of the shortest string in the longest
                if len(getList(book_lower)) >= len(getList(a_book_lower)):
                    match1 = book_lower.find(a_book_lower)
                else:
                    match1 = a_book_lower.find(book_lower)

                if len(getList(book_lower)) >= len(getList(partial_name.lower())):
                    match2 = book_lower.find(partial_name.lower())
                else:
                    match2 = partial_name.lower().find(book_lower)

                if match1 < match2:
                    logger.debug(
                        "Fuzz left change, prefer [%s] over [%s] for [%s]" %
                        (a_book['BookName'], partial_name, book))
                    best_partial = partial
                    partial_name = a_book['BookName']
                    partial_id = a_book['BookID']

        if best_ratio > 90:
            logger.debug(
                "Fuzz match ratio [%d] [%s] [%s]" % (best_ratio, book, ratio_name))
            return ratio_id
        if best_partial > 85:
            logger.debug(
                "Fuzz match partial [%d] [%s] [%s]" % (best_partial, book, partial_name))
            return partial_id
        if best_partname > 95:
            logger.debug(
                "Fuzz match partname [%d] [%s] [%s]" % (best_partname, book, partname_name))
            return partname_id

        if books:
            logger.debug(
                'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s], partname [%d,%s]' %
                (author, book, best_ratio, ratio_name, best_partial, partial_name, best_partname, partname_name))
        else:
            logger.debug('No books found in database for %s' % author)
        return 0
예제 #7
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % str(e))
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for unwanted language, just warn
#
        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference, %s' % (bookname, bookLanguage))

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
        else:
            logger.warning("No AuthorID for %s, unable to add book %s" % (authorname, bookname))
            return

        bookname = unaccented(bookname)
        bookname, booksub = split_title(authorname, bookname)
        dic = {':': '', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic).strip()
        booksub = replace_all(booksub, dic).strip()
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link = cache_cover(bookid, bookimg)
            if link:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum is None:
            #  try to get series info from librarything
            series, seriesNum = getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
예제 #8
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):
      try:
        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("Error fetching author books: %s" % str(e))
            return
        if rootxml is None:
            logger.debug("Error requesting author books")
            return
        if not in_cache:
            api_hits = api_hits + 1
        resultxml = rootxml.getiterator('book')

        valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

        if not len(resultxml):
            logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
        else:
            logger.debug("[%s] Now processing books with GoodReads API" % authorname)

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            logger.debug(u"url " + URL)

            authorNameResult = rootxml.find('./author/name').text
            logger.debug(u"author name " + authorNameResult)
            loopCount = 1

            isbn_979_dict = {
                "10": "fre",
                "11": "kor",
                "12": "ita"
            }
            isbn_978_dict = {
                "0": "eng",
                "1": "eng",
                "2": "fre",
                "3": "ger",
                "4": "jap",
                "5": "rus"
            }

            while resultxml:
                for book in resultxml:
                    total_count = total_count + 1

                    if (book.find('publication_year').text is None):
                        pubyear = "0000"
                    else:
                        pubyear = book.find('publication_year').text

                    try:
                        bookimg = book.find('image_url').text
                        if ('nocover' in bookimg):
                            bookimg = 'images/nocover.png'
                    except (KeyError, AttributeError):
                        bookimg = 'images/nocover.png'

                    bookLanguage = "Unknown"
                    find_field = "id"
                    isbn = ""
                    isbnhead = ""
                    if "All" not in valid_langs:  # do we care about language
                        if book.find('isbn').text:
                            find_field = "isbn"
                            isbn = book.find('isbn').text
                            isbnhead = isbn[0:3]
                        else:
                            if book.find('isbn13').text:
                                find_field = "isbn13"
                                isbn = book.find('isbn13').text
                                isbnhead = isbn[3:6]
                        if (find_field != 'id'):  # isbn10 or isbn13 found
                            # Try to use shortcut of ISBN identifier codes described here...
                            # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups
                            if isbnhead != "":
                                if find_field == "isbn13" and isbn.startswith('979'):
                                    for item in isbn_979_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = isbn_979_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead))
                                elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')):
                                    for item in isbn_978_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = isbn_978_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead))

                        if bookLanguage == "Unknown":
                            # Nothing in the isbn dictionary, try any cached results
                            match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead))
                            if match:
                                bookLanguage = match['lang']
                                cache_hits = cache_hits + 1
                                logger.debug("Found cached language [%s] for %s [%s]" %
                                             (bookLanguage, find_field, isbnhead))
                            else:
                                # no match in cache, try searching librarything for a language code using the isbn
                                # if no language found, librarything return value is "invalid" or "unknown"
                                # returns plain text, not xml
                                BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                try:
                                    librarything_wait()
                                    resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                    lt_lang_hits = lt_lang_hits + 1
                                    logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                    if ('invalid' in resp or 'Unknown' in resp):
                                        bookLanguage = "Unknown"
                                    else:
                                        bookLanguage = resp  # found a language code
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage))
                                except Exception as e:
                                    logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e)))

                        if bookLanguage == "Unknown":
                            # still  no earlier match, we'll have to search the goodreads api
                            try:
                                if book.find(find_field).text:
                                    BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                        book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                    logger.debug(u"Book URL: " + BOOK_URL)

                                    try:
                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        BOOK_rootxml, in_cache = get_xml_request(BOOK_URL)
                                        if BOOK_rootxml is None:
                                            logger.debug('Error requesting book language code')
                                            bookLanguage = ""
                                        else:
                                            if not in_cache:
                                                # only update last_goodreads if the result wasn't found in the cache
                                                lazylibrarian.LAST_GOODREADS = time_now
                                            bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                    except Exception as e:
                                        logger.error("Error finding book results: %s" % str(e))
                                    if not in_cache:
                                        gr_lang_hits = gr_lang_hits + 1
                                    if not bookLanguage:
                                        bookLanguage = "Unknown"
                                        # At this point, give up?
                                        # WhatWork on author/title doesn't give us a language.
                                        # It might give us the "original language" of the book (but not always)
                                        # and our copy might not be in the original language anyway
                                        # eg "The Girl With the Dragon Tattoo" original language Swedish
                                        # If we have an isbn, try WhatISBN to get alternatives
                                        # in case any of them give us a language, but it seems if thinglang doesn't
                                        # have a language for the first isbn code, it doesn't for any of the
                                        # alternatives either
                                        # Goodreads search results don't include the language. Although sometimes
                                        # it's in the html page, it's not in the xml results


                                    if (isbnhead != ""):
                                        # if GR didn't give an isbn we can't cache it, just use language for this book
                                        myDB.action('insert into languages values ("%s", "%s")' %
                                                    (isbnhead, bookLanguage))
                                        logger.debug("GoodReads reports language [%s] for %s" %
                                                     (bookLanguage, isbnhead))
                                    else:
                                        not_cached = not_cached + 1

                                    logger.debug(u"GR language: " + bookLanguage)
                                else:
                                    logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                    # continue

                            except Exception as e:
                                logger.debug(u"Goodreads language search failed: %s" % str(e))

                        if bookLanguage not in valid_langs:
                            logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage))
                            ignored = ignored + 1
                            continue
                    bookname = book.find('title').text
                    bookid = book.find('id').text
                    bookdesc = book.find('description').text
                    bookisbn = book.find('isbn').text
                    bookpub = book.find('publisher').text
                    booklink = book.find('link').text
                    bookrate = float(book.find('average_rating').text)
                    bookpages = book.find('num_pages').text
                    bookname = unaccented(bookname)

                    bookname, booksub = split_title(authorNameResult, bookname)

                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace
                    booksub = replace_all(booksub, dic)
                    booksub = booksub.strip()  # strip whitespace
                    if booksub:
                        series, seriesNum = bookSeries(booksub)
                    else:
                        series, seriesNum = bookSeries(bookname)

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    # We use bookid, then reject if another author/title has a different bookid so we just keep one...
                    find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False

                    if re.match('[^\w-]', bookname):  # reject books with bad characters in title
                        logger.debug(u"removed result [" + bookname + "] for bad characters")
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug('Rejecting bookid %s for %s, no bookname' %
                                     (bookid, authorNameResult))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' %
                                                 (bookname, authorNameResult.replace('"', '""')))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (find_book['BookID'], authorNameResult, bookname, bookid))
                                    duplicates = duplicates + 1
                                    rejected = True

                    if not rejected:
                        find_books = myDB.match('SELECT AuthorName,BookName FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            if bookname != find_books['BookName'] or authorNameResult != find_books['AuthorName']:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                            (bookid, authorNameResult, bookname,
                                             find_books['AuthorName'], find_books['BookName']))
                            else:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                             (bookid, authorNameResult, bookname))
                            duplicates = duplicates + 1
                            rejected = True

                    if not rejected:
                        if book_status != "Ignored":
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorName": authorNameResult,
                                    "AuthorID": authorid,
                                    "AuthorLink": None,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": None,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today(),
                                    "Series": series,
                                    "SeriesNum": seriesNum
                                }

                                resultsCount = resultsCount + 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            if seriesNum is None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict, controlValueDict)

                            if not find_book_status:
                                logger.debug(u"[%s] Added book: %s" % (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                logger.debug(u"[%s] Updated book: %s" % (authorname, bookname))
                                updated_count = updated_count + 1
                        else:
                            book_ignore_count = book_ignore_count + 1

                loopCount = loopCount + 1
                URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                      urllib.urlencode(self.params) + '&page=' + str(loopCount)
                resultxml = None
                try:
                    rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
                    if rootxml is None:
                        logger.debug('Error requesting next page of results')
                    else:
                        resultxml = rootxml.getiterator('book')
                        if not in_cache:
                            api_hits = api_hits + 1
                except Exception as e:
                    resultxml = None
                    logger.error("Error finding next page of results: %s" % str(e))

                if resultxml:
                    if all(False for book in resultxml):  # returns True if iterator is empty
                        resultxml = None

        lastbook = myDB.match('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC' % authorid)
        if lastbook:
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }
        myDB.upsert("authors", newValueDict, controlValueDict)

        # This is here because GoodReads sometimes has several entries with the same BookID!
        modified_count = added_count + updated_count

        logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
        logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
        logger.debug(
            "Removed %s bad character or no-name result%s for author" %
            (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
        logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count)))

        myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                    (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                     cache_hits, ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                        (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                        (authorname, added_count, plural(added_count)))

      except Exception as e:
        logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())