Python addAuthorNameToDB Examples

Programming Language: Python

Namespace/Package Name: lazylibrarian.importer

Method/Function: addAuthorNameToDB

Examples at hotexamples.com: 7

Python addAuthorNameToDB - 7 examples found. These are the top rated real world Python examples of lazylibrarian.importer.addAuthorNameToDB extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: api.py Project: Code-Slave/LazyLibrarian

 def _addAuthor(self, **kwargs):
     if 'name' not in kwargs:
         self.data = 'Missing parameter: name'
         return
     else:
         self.id = kwargs['name']
     try:
         self.data = addAuthorNameToDB(author=self.id, refresh=False)
     except Exception as e:
         self.data = "%s %s" % (type(e).__name__, str(e))

Example #2

Show file

def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None
        content = {}

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug(u'Reading file %s' % csvFile)
            reader = csv.reader(open(csvFile))
            for row in reader:
                if reader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                else:
                    # Otherwise, the key in the content dictionary is the first item in the
                    # row and we can create the sub-dictionary by using the zip() function.
                    # we include the key in the dictionary as our exported csv files use
                    # bookid as the key
                    content[row[0]] = dict(zip(headers, row))

            # We can now get to the content by using the resulting dictionary, so to see
            # the list of lines, we can do: print content.keys()  to get a list of keys
            # To see the list of fields available for each book:  print headers

            if 'Author' not in headers or 'Title' not in headers:
                msg = 'Invalid CSV file found %s' % csvFile
                logger.warn(msg)
                return msg

            myDB = database.DBConnection()
            bookcount = 0
            authcount = 0
            skipcount = 0
            logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
            for item in content.keys():
                authorname = content[item]['Author']
                if isinstance(authorname, str) and hasattr(authorname, "decode"):
                    authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
                authorname = formatAuthorName(authorname)
                title = content[item]['Title']
                if isinstance(title, str) and hasattr(title, "decode"):
                    title = title.decode(lazylibrarian.SYS_ENCODING)

                authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                if authmatch:
                    logger.debug(u"CSV: Author %s found in database" % authorname)
                else:
                    logger.debug(u"CSV: Author %s not found" % authorname)
                    newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                 addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                    if len(newauthor) and newauthor != authorname:
                        logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                        authorname = newauthor
                    if new:
                        authcount += 1

                bookmatch = finditem(content[item], authorname, headers)
                result = ''
                if bookmatch:
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        bookcount += 1
                else:
                    searchterm = "%s <ll> %s" % (title, authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            bookcount += 1
                            bookmatch = True

                if not bookmatch:
                    msg = "Skipping book %s by %s" % (title, authorname)
                    if not result:
                        msg += ', No results returned'
                        logger.warn(msg)
                    else:
                        msg += ', No match found'
                        logger.warn(msg)
                        msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                    result['authorname'], result['bookname'])
                        logger.warn(msg)
                    skipcount += 1
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg

Example #3

Show file

File: csvfile.py Project: DobyTang/LazyLibrarian

def import_CSV(search_dir=None, library='eBook'):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir, library=library)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No %s CSV file found in %s" % (library, search_dir)
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" % authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                     addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname, library=library)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        if library == 'eBook':
                            bookstatus = bookmatch['Status']
                        else:
                            bookstatus = bookmatch['AudioStatus']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info('Found %s %s by %s, already marked as "%s"' %
                                        (library, bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info('Found %s %s by %s, marking as "Wanted"' % (library, bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            if library == 'eBook':
                                newValueDict = {"Status": "Wanted"}
                            else:
                                newValueDict = {"AudioStatus": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info("Found (%s%% %s%%) %s: %s for %s: %s" %
                                        (result['author_fuzz'], result['book_fuzz'],
                                         result['authorname'], result['bookname'],
                                         authorname, title))
                            if library == 'eBook':
                                import_book(result['bookid'], ebook="Wanted", wait=True)
                            else:
                                import_book(result['bookid'], audio="Wanted", wait=True)
                            imported = myDB.match('select * from books where BookID=?', (result['bookid'],))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i %s%s in csv file, %i already existing or wanted" % (total, library,
                                                                                plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i %s%s as 'Wanted', %i %s%s not found" % \
                  (authcount, plural(authcount), bookcount, library, plural(bookcount),
                   skipcount, plural(skipcount), library)
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" % csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' % (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" % csvFile)
                    if os.path.isdir(csvFile + '.fail'):
                        try:
                            shutil.rmtree(csvFile + '.fail')
                        except Exception as why:
                            logger.warn("Unable to remove %s, %s %s" % (csvFile + '.fail',
                                                                        type(why).__name__, str(why)))
                    try:
                        _ = safe_move(csvFile, csvFile + '.fail')
                    except Exception as e:
                        logger.error("Unable to rename %s, %s %s" %
                                     (csvFile, type(e).__name__, str(e)))
                        if not os.access(csvFile, os.R_OK):
                            logger.error("%s is not readable" % csvFile)
                        if not os.access(csvFile, os.W_OK):
                            logger.error("%s is not writeable" % csvFile)
                        parent = os.path.dirname(csvFile)
                        try:
                            with open(os.path.join(parent, 'll_temp'), 'w') as f:
                                f.write('test')
                            os.remove(os.path.join(parent, 'll_temp'))
                        except Exception as why:
                            logger.error("Directory %s is not writeable: %s" % (parent, why))
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg

Example #4

Show file

File: calibre.py Project: petetomasik/LazyLibrarian

def syncCalibreList(col_read=None, col_toread=None, userid=None):
    """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs,
        and sync current/supplied user's read/toread or supplied read/toread columns to calibre database.
        Return message giving totals """

    myDB = database.DBConnection()
    if not userid:
        cookie = cherrypy.request.cookie
        if cookie and 'll_uid' in cookie.keys():
            userid = cookie['ll_uid'].value
    if userid:
        res = myDB.match(
            'SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?',
            (userid, ))
        if res:
            username = res['UserName']
            if not col_read:
                col_read = res['CalibreRead']
            if not col_toread:
                col_toread = res['CalibreToRead']
            toreadlist = getList(res['ToRead'])
            readlist = getList(res['HaveRead'])
            # suppress duplicates (just in case)
            toreadlist = list(set(toreadlist))
            readlist = list(set(readlist))
        else:
            return "Error: Unable to get user column settings for %s" % userid

    if not userid:
        return "Error: Unable to find current userid"

    if not col_read and not col_toread:
        return "User %s has no calibre columns set" % username

    # check user columns exist in calibre and create if not
    res = calibredb('custom_columns')
    columns = res[0].split('\n')
    custom_columns = []
    for column in columns:
        if column:
            custom_columns.append(column.split(' (')[0])

    if col_read not in custom_columns:
        added = calibredb('add_custom_column', [col_read, col_read, 'bool'])
        if "column created" not in added[0]:
            return added
    if col_toread not in custom_columns:
        added = calibredb('add_custom_column',
                          [col_toread, col_toread, 'bool'])
        if "column created" not in added[0]:
            return added

    nomatch = 0
    readcol = ''
    toreadcol = ''
    map_ctol = {}
    map_ltoc = {}
    if col_read:
        readcol = '*' + col_read
    if col_toread:
        toreadcol = '*' + col_toread

    calibre_list = calibreList(col_read, col_toread)
    if not isinstance(calibre_list, list):
        # got an error message from calibredb
        return '"%s"' % calibre_list

    for item in calibre_list:
        if toreadcol and toreadcol in item or readcol and readcol in item:
            authorname, authorid, added = addAuthorNameToDB(item['authors'],
                                                            refresh=False,
                                                            addbooks=False)
            if authorname:
                if authorname != item['authors']:
                    logger.debug(
                        "Changed authorname for [%s] from [%s] to [%s]" %
                        (item['title'], item['authors'], authorname))
                    item['authors'] = authorname
                bookid = find_book_in_db(authorname, item['title'])
                if not bookid:
                    searchterm = "%s <ll> %s" % (item['title'], authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.debug(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            bookid = result['bookid']
                            import_book(bookid)
                if bookid:
                    # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string
                    # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric)
                    # so convert all dict items to strings for ease of matching.
                    map_ctol[str(item['id'])] = str(bookid)
                    map_ltoc[str(bookid)] = str(item['id'])
                else:
                    logger.warn(
                        'Calibre Book [%s] by [%s] is not in lazylibrarian database'
                        % (item['title'], authorname))
                    nomatch += 1
            else:
                logger.warn(
                    'Calibre Author [%s] not matched in lazylibrarian database'
                    % (item['authors']))
                nomatch += 1

    # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones
    # which might be books calibre doesn't have, or might be minor differences in author or title

    for idlist in [("Read", readlist), ("To_Read", toreadlist)]:
        booklist = idlist[1]
        for bookid in booklist:
            cmd = "SELECT AuthorID,BookName from books where BookID=?"
            book = myDB.match(cmd, (bookid, ))
            if not book:
                logger.error('Error finding bookid %s' % bookid)
            else:
                cmd = "SELECT AuthorName from authors where AuthorID=?"
                author = myDB.match(cmd, (book['AuthorID'], ))
                if not author:
                    logger.error('Error finding authorid %s' %
                                 book['AuthorID'])
                else:
                    match = False
                    for item in calibre_list:
                        if item['authors'] == author['AuthorName'] and item[
                                'title'] == book['BookName']:
                            logger.debug("Exact match for %s [%s]" %
                                         (idlist[0], book['BookName']))
                            map_ctol[str(item['id'])] = str(bookid)
                            map_ltoc[str(bookid)] = str(item['id'])
                            match = True
                            break
                    if not match:
                        high = 0
                        highname = ''
                        highid = ''
                        for item in calibre_list:
                            if item['authors'] == author['AuthorName']:
                                n = fuzz.token_sort_ratio(
                                    item['title'], book['BookName'])
                                if n > high:
                                    high = n
                                    highname = item['title']
                                    highid = item['id']

                        if high > 95:
                            logger.debug(
                                "Found ratio match %s%% [%s] for %s [%s]" %
                                (high, highname, idlist[0], book['BookName']))
                            map_ctol[str(highid)] = str(bookid)
                            map_ltoc[str(bookid)] = str(highid)
                            match = True

                    if not match:
                        logger.warn(
                            "No match for %s %s by %s in calibre database, closest match %s%% [%s]"
                            % (idlist[0], book['BookName'],
                               author['AuthorName'], high, highname))
                        nomatch += 1

    logger.debug("BookID mapping complete, %s match %s, nomatch %s" %
                 (username, len(map_ctol), nomatch))

    # now sync the lists
    if userid:
        last_read = []
        last_toread = []
        calibre_read = []
        calibre_toread = []

        cmd = 'select SyncList from sync where UserID=? and Label=?'
        res = myDB.match(cmd, (userid, col_read))
        if res:
            last_read = getList(res['SyncList'])
        res = myDB.match(cmd, (userid, col_toread))
        if res:
            last_toread = getList(res['SyncList'])

        for item in calibre_list:
            if toreadcol and toreadcol in item and item[
                    toreadcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_toread.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre to_read book %s:%s has no lazylibrarian bookid"
                        % (item['authors'], item['title']))
            if readcol and readcol in item and item[readcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_read.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre read book %s:%s has no lazylibrarian bookid" %
                        (item['authors'], item['title']))

        logger.debug("Found %s calibre read, %s calibre toread" %
                     (len(calibre_read), len(calibre_toread)))
        logger.debug("Found %s lazylib read, %s lazylib toread" %
                     (len(readlist), len(toreadlist)))

        added_to_ll_toread = list(set(toreadlist) - set(last_toread))
        removed_from_ll_toread = list(set(last_toread) - set(toreadlist))
        added_to_ll_read = list(set(readlist) - set(last_read))
        removed_from_ll_read = list(set(last_read) - set(readlist))
        logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" %
                     (len(added_to_ll_toread), len(removed_from_ll_toread),
                      len(added_to_ll_read), len(removed_from_ll_read)))

        added_to_calibre_toread = list(set(calibre_toread) - set(last_toread))
        removed_from_calibre_toread = list(
            set(last_toread) - set(calibre_toread))
        added_to_calibre_read = list(set(calibre_read) - set(last_read))
        removed_from_calibre_read = list(set(last_read) - set(calibre_read))
        logger.debug(
            "calibre changes to copy to lazylibrarian: %s %s %s %s" %
            (len(added_to_calibre_toread), len(removed_from_calibre_toread),
             len(added_to_calibre_read), len(removed_from_calibre_read)))

        calibre_changes = 0
        for item in added_to_calibre_read:
            if item not in readlist:
                readlist.append(item)
                logger.debug("Lazylibrarian marked %s as read" % item)
                calibre_changes += 1
        for item in added_to_calibre_toread:
            if item not in toreadlist:
                toreadlist.append(item)
                logger.debug("Lazylibrarian marked %s as to_read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_read:
            if item in readlist:
                readlist.remove(item)
                logger.debug("Lazylibrarian removed %s from read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_toread:
            if item in toreadlist:
                toreadlist.remove(item)
                logger.debug("Lazylibrarian removed %s from to_read" % item)
                calibre_changes += 1
        if calibre_changes:
            myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?',
                        (', '.join(toreadlist), ', '.join(readlist), userid))
        ll_changes = 0
        for item in added_to_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_toread, item))
        for item in removed_from_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_toread, item))

        for item in added_to_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_read, item))

        for item in removed_from_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_read, item))

        # store current sync list as comparison for next sync
        controlValueDict = {"UserID": userid, "Label": col_read}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(readlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)
        controlValueDict = {"UserID": userid, "Label": col_toread}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(toreadlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        msg = "%s sync updated: %s calibre, %s lazylibrarian" % (
            username, ll_changes, calibre_changes)
    return msg

Example #5

Show file

File: csvfile.py Project: geoffg41/LazyLibrarian

def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
        Optionally delete the file on successful completion
    """
    # noinspection PyBroadException
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None

        myDB = database.DBConnection()
        bookcount = 0
        authcount = 0
        skipcount = 0
        total = 0
        existing = 0

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug('Reading file %s' % csvFile)
            csvreader = reader(open(csvFile, 'rU'))
            for row in csvreader:
                if csvreader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                    if 'Author' not in headers or 'Title' not in headers:
                        msg = 'Invalid CSV file found %s' % csvFile
                        logger.warn(msg)
                        return msg
                else:
                    total += 1
                    item = dict(list(zip(headers, row)))
                    authorname = formatAuthorName(item['Author'])
                    title = makeUnicode(item['Title'])

                    authmatch = myDB.match(
                        'SELECT * FROM authors where AuthorName=?',
                        (authorname, ))

                    if authmatch:
                        logger.debug("CSV: Author %s found in database" %
                                     authorname)
                    else:
                        logger.debug("CSV: Author %s not found" % authorname)
                        newauthor, authorid, new = addAuthorNameToDB(
                            author=authorname,
                            addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                        if len(newauthor) and newauthor != authorname:
                            logger.debug(
                                "Preferred authorname changed from [%s] to [%s]"
                                % (authorname, newauthor))
                            authorname = newauthor
                        if new:
                            authcount += 1

                    bookmatch = finditem(item, authorname)
                    result = ''
                    imported = ''
                    if bookmatch:
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            existing += 1
                            logger.info(
                                'Found book %s by %s, already marked as "%s"' %
                                (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookcount += 1
                    else:
                        searchterm = "%s <ll> %s" % (title, authorname)
                        results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                    and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                bookmatch = True
                        if not bookmatch:  # no match on full searchterm, try splitting out subtitle
                            newtitle, _ = split_title(authorname, title)
                            if newtitle != title:
                                title = newtitle
                                searchterm = "%s <ll> %s" % (title, authorname)
                                results = search_for(unaccented(searchterm))
                                if results:
                                    result = results[0]
                                    if result['author_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO'] \
                                            and result['book_fuzz'] >= lazylibrarian.CONFIG['MATCH_RATIO']:
                                        bookmatch = True
                        if bookmatch:
                            logger.info(
                                "Found (%s%% %s%%) %s: %s for %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname'],
                                 authorname, title))
                            import_book(result['bookid'], wait=True)
                            imported = myDB.match(
                                'select * from books where BookID=?',
                                (result['bookid'], ))
                            if imported:
                                bookcount += 1
                            else:
                                bookmatch = False

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (title, authorname)
                        if not result:
                            msg += ', No results found'
                            logger.warn(msg)
                        elif not imported:
                            msg += ', Failed to import %s' % result['bookid']
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                            logger.warn(msg)
                        skipcount += 1

            msg = "Found %i book%s in csv file, %i already existing or wanted" % (
                total, plural(total), existing)
            logger.info(msg)
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            if lazylibrarian.CONFIG['DELETE_CSV']:
                if skipcount == 0:
                    logger.info("Deleting %s on successful completion" %
                                csvFile)
                    try:
                        os.remove(csvFile)
                    except OSError as why:
                        logger.warn('Unable to delete %s: %s' %
                                    (csvFile, why.strerror))
                else:
                    logger.warn("Not deleting %s as not all books found" %
                                csvFile)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg

Example #6

Show file

File: calibre.py Project: DobyTang/LazyLibrarian

def syncCalibreList(col_read=None, col_toread=None, userid=None):
    """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs,
        and sync current/supplied user's read/toread or supplied read/toread columns to calibre database.
        Return message giving totals """

    myDB = database.DBConnection()
    username = ''
    readlist = []
    toreadlist = []
    if not userid:
        cookie = cherrypy.request.cookie
        if cookie and 'll_uid' in list(cookie.keys()):
            userid = cookie['ll_uid'].value
    if userid:
        res = myDB.match('SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?',
                         (userid,))
        if res:
            username = res['UserName']
            if not col_read:
                col_read = res['CalibreRead']
            if not col_toread:
                col_toread = res['CalibreToRead']
            toreadlist = getList(res['ToRead'])
            readlist = getList(res['HaveRead'])
            # suppress duplicates (just in case)
            toreadlist = list(set(toreadlist))
            readlist = list(set(readlist))
        else:
            return "Error: Unable to get user column settings for %s" % userid

    if not userid:
        return "Error: Unable to find current userid"

    if not col_read and not col_toread:
        return "User %s has no calibre columns set" % username

    # check user columns exist in calibre and create if not
    res = calibredb('custom_columns')
    columns = res[0].split('\n')
    custom_columns = []
    for column in columns:
        if column:
            custom_columns.append(column.split(' (')[0])

    if col_read not in custom_columns:
        added = calibredb('add_custom_column', [col_read, col_read, 'bool'])
        if "column created" not in added[0]:
            return added
    if col_toread not in custom_columns:
        added = calibredb('add_custom_column', [col_toread, col_toread, 'bool'])
        if "column created" not in added[0]:
            return added

    nomatch = 0
    readcol = ''
    toreadcol = ''
    map_ctol = {}
    map_ltoc = {}
    if col_read:
        readcol = '*' + col_read
    if col_toread:
        toreadcol = '*' + col_toread

    calibre_list = calibreList(col_read, col_toread)
    if not isinstance(calibre_list, list):
        # got an error message from calibredb
        return '"%s"' % calibre_list

    for item in calibre_list:
        if toreadcol and toreadcol in item or readcol and readcol in item:
            authorname, authorid, added = addAuthorNameToDB(item['authors'], refresh=False, addbooks=False)
            if authorname:
                if authorname != item['authors']:
                    logger.debug("Changed authorname for [%s] from [%s] to [%s]" %
                                 (item['title'], item['authors'], authorname))
                    item['authors'] = authorname
                bookid, mtype = find_book_in_db(authorname, item['title'], ignored=False, library='eBook')
                if bookid and mtype == "Ignored":
                    logger.warn("Book %s by %s is marked Ignored in database, importing anyway" %
                                (item['title'], authorname))
                if not bookid:
                    searchterm = "%s <ll> %s" % (item['title'], authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.debug("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                       result['authorname'], result['bookname']))
                            bookid = result['bookid']
                            import_book(bookid)
                if bookid:
                    # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string
                    # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric)
                    # so convert all dict items to strings for ease of matching.
                    map_ctol[str(item['id'])] = str(bookid)
                    map_ltoc[str(bookid)] = str(item['id'])
                else:
                    logger.warn('Calibre Book [%s] by [%s] is not in lazylibrarian database' %
                                (item['title'], authorname))
                    nomatch += 1
            else:
                logger.warn('Calibre Author [%s] not matched in lazylibrarian database' % (item['authors']))
                nomatch += 1

    # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones
    # which might be books calibre doesn't have, or might be minor differences in author or title

    for idlist in [("Read", readlist), ("To_Read", toreadlist)]:
        booklist = idlist[1]
        for bookid in booklist:
            cmd = "SELECT AuthorID,BookName from books where BookID=?"
            book = myDB.match(cmd, (bookid,))
            if not book:
                logger.error('Error finding bookid %s' % bookid)
            else:
                cmd = "SELECT AuthorName from authors where AuthorID=?"
                author = myDB.match(cmd, (book['AuthorID'],))
                if not author:
                    logger.error('Error finding authorid %s' % book['AuthorID'])
                else:
                    match = False
                    high = 0
                    highname = ''
                    for item in calibre_list:
                        if item['authors'] == author['AuthorName'] and item['title'] == book['BookName']:
                            logger.debug("Exact match for %s [%s]" % (idlist[0], book['BookName']))
                            map_ctol[str(item['id'])] = str(bookid)
                            map_ltoc[str(bookid)] = str(item['id'])
                            match = True
                            break
                    if not match:
                        highid = ''
                        for item in calibre_list:
                            if item['authors'] == author['AuthorName']:
                                n = fuzz.token_sort_ratio(item['title'], book['BookName'])
                                if n > high:
                                    high = n
                                    highname = item['title']
                                    highid = item['id']

                        if high > 95:
                            logger.debug("Found ratio match %s%% [%s] for %s [%s]" %
                                         (high, highname, idlist[0], book['BookName']))
                            map_ctol[str(highid)] = str(bookid)
                            map_ltoc[str(bookid)] = str(highid)
                            match = True

                    if not match:
                        logger.warn("No match for %s %s by %s in calibre database, closest match %s%% [%s]" %
                                    (idlist[0], book['BookName'], author['AuthorName'], high, highname))
                        nomatch += 1

    logger.debug("BookID mapping complete, %s match %s, nomatch %s" % (username, len(map_ctol), nomatch))

    # now sync the lists
    if not userid:
        msg = "No userid found"
    else:
        last_read = []
        last_toread = []
        calibre_read = []
        calibre_toread = []

        cmd = 'select SyncList from sync where UserID=? and Label=?'
        res = myDB.match(cmd, (userid, col_read))
        if res:
            last_read = getList(res['SyncList'])
        res = myDB.match(cmd, (userid, col_toread))
        if res:
            last_toread = getList(res['SyncList'])

        for item in calibre_list:
            if toreadcol and toreadcol in item and item[toreadcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_toread.append(map_ctol[str(item['id'])])
                else:
                    logger.warn("Calibre to_read book %s:%s has no lazylibrarian bookid" %
                                (item['authors'], item['title']))
            if readcol and readcol in item and item[readcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_read.append(map_ctol[str(item['id'])])
                else:
                    logger.warn("Calibre read book %s:%s has no lazylibrarian bookid" %
                                (item['authors'], item['title']))

        logger.debug("Found %s calibre read, %s calibre toread" % (len(calibre_read), len(calibre_toread)))
        logger.debug("Found %s lazylib read, %s lazylib toread" % (len(readlist), len(toreadlist)))

        added_to_ll_toread = list(set(toreadlist) - set(last_toread))
        removed_from_ll_toread = list(set(last_toread) - set(toreadlist))
        added_to_ll_read = list(set(readlist) - set(last_read))
        removed_from_ll_read = list(set(last_read) - set(readlist))
        logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" % (len(added_to_ll_toread),
                     len(removed_from_ll_toread), len(added_to_ll_read), len(removed_from_ll_read)))

        added_to_calibre_toread = list(set(calibre_toread) - set(last_toread))
        removed_from_calibre_toread = list(set(last_toread) - set(calibre_toread))
        added_to_calibre_read = list(set(calibre_read) - set(last_read))
        removed_from_calibre_read = list(set(last_read) - set(calibre_read))
        logger.debug("calibre changes to copy to lazylibrarian: %s %s %s %s" % (len(added_to_calibre_toread),
                     len(removed_from_calibre_toread), len(added_to_calibre_read), len(removed_from_calibre_read)))

        calibre_changes = 0
        for item in added_to_calibre_read:
            if item not in readlist:
                readlist.append(item)
                logger.debug("Lazylibrarian marked %s as read" % item)
                calibre_changes += 1
        for item in added_to_calibre_toread:
            if item not in toreadlist:
                toreadlist.append(item)
                logger.debug("Lazylibrarian marked %s as to_read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_read:
            if item in readlist:
                readlist.remove(item)
                logger.debug("Lazylibrarian removed %s from read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_toread:
            if item in toreadlist:
                toreadlist.remove(item)
                logger.debug("Lazylibrarian removed %s from to_read" % item)
                calibre_changes += 1
        if calibre_changes:
            myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?',
                        (', '.join(toreadlist), ', '.join(readlist), userid))
        ll_changes = 0
        for item in added_to_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], 'true'], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" % (col_toread, item))
        for item in removed_from_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" % (col_toread, item))

        for item in added_to_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], 'true'], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" % (col_read, item))

        for item in removed_from_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" % (col_read, item))

        # store current sync list as comparison for next sync
        controlValueDict = {"UserID": userid, "Label": col_read}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(readlist)}
        myDB.upsert("sync", newValueDict, controlValueDict)
        controlValueDict = {"UserID": userid, "Label": col_toread}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(toreadlist)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        msg = "%s sync updated: %s calibre, %s lazylibrarian" % (username, ll_changes, calibre_changes)
    return msg

Example #7

Show file

def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    try:
        destdir = lazylibrarian.DIRECTORY('Destination')
        if not startdir:
            if not destdir:
                logger.warn('Cannot find destination directory: %s. Not scanning' % destdir)
                return 0
            startdir = destdir

        if not os.path.isdir(startdir):
            logger.warn('Cannot find directory: %s. Not scanning' % startdir)
            return 0

        if not internet():
            logger.warn('Libraryscan: No internet connection')
            return 0

        myDB = database.DBConnection()

        # keep statistics of full library scans
        if startdir == destdir:
            myDB.action('DELETE from stats')
            try:  # remove any extra whitespace in authornames
                authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "%  %"')
                if authors:
                    logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        authorname = ' '.join(author['AuthorName'].split())
                        # Have we got author name both with-and-without extra spaces? If so, merge them
                        duplicate = myDB.match(
                            'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname)
                        if duplicate:
                            myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName'])
                            if author['AuthorID'] != duplicate['AuthorID']:
                                myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' %
                                            (duplicate['AuthorID'], author['AuthorID']))
                        else:
                            myDB.action(
                                'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
            except Exception as e:
                logger.info('Error: ' + str(e))

        logger.info('Scanning ebook directory: %s' % startdir)

        new_book_count = 0
        modified_count = 0
        rescan_count = 0
        rescan_hits = 0
        file_count = 0
        author = ""

        if lazylibrarian.CONFIG['FULL_SCAN']:
            cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors'
            cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"'
            if not startdir == destdir:
                cmd += ' and BookFile like "' + startdir + '%"'
            books = myDB.select(cmd)
            status = lazylibrarian.CONFIG['NOTFOUND_STATUS']
            logger.info('Missing books will be marked as %s' % status)
            for book in books:
                bookID = book['BookID']
                bookfile = book['BookFile']

                if not (bookfile and os.path.isfile(bookfile)):
                    myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                    myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                    logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName']))

        # to save repeat-scans of the same directory if it contains multiple formats of the same book,
        # keep track of which directories we've already looked at
        processed_subdirectories = []
        warned = False  # have we warned about no new authors setting
        matchString = ''
        for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the EBOOK_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
        pattern = re.compile(matchString, re.VERBOSE)

        for r, d, f in os.walk(startdir):
            for directory in d[:]:
                # prevent magazine being scanned
                if directory.startswith("_") or directory.startswith("."):
                    d.remove(directory)

            for files in f:
                file_count += 1

                if isinstance(r, str):
                    r = r.decode(lazylibrarian.SYS_ENCODING)

                subdirectory = r.replace(startdir, '')
                # Added new code to skip if we've done this directory before.
                # Made this conditional with a switch in config.ini
                # in case user keeps multiple different books in the same subdirectory
                if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories):
                    logger.debug("[%s] already scanned" % subdirectory)
                else:
                    # If this is a book, try to get author/title/isbn/language
                    # if epub or mobi, read metadata from the book
                    # If metadata.opf exists, use that allowing it to override
                    # embedded metadata. User may have edited metadata.opf
                    # to merge author aliases together
                    # If all else fails, try pattern match for author/title
                    # and look up isbn/lang from LT or GR later
                    match = 0
                    if is_valid_booktype(files):

                        logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory))

                        language = "Unknown"
                        isbn = ""
                        book = ""
                        author = ""
                        gr_id = ""
                        gb_id = ""
                        extn = os.path.splitext(files)[1]

                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == ".epub") or (extn == ".mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)

                            try:
                                res = get_book_info(book_filename)
                            except Exception as e:
                                logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e)))
                                res = {}
                            # title and creator are the minimum we need
                            if 'title' in res and 'creator' in res:
                                book = res['title']
                                author = res['creator']
                                if book and len(book) > 2 and author and len(author) > 2:
                                    match = 1
                                if 'language' in res:
                                    language = res['language']
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                if 'type' in res:
                                    extn = res['type']
                                logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                             (isbn, language, author, book, extn))
                            if not match:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                        # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                        # just look for any .opf file in the current directory since we don't know
                        # LL preferred authorname/bookname at this point.
                        # Allow metadata in file to override book contents as may be users pref

                        metafile = opf_file(r)
                        try:
                            res = get_book_info(metafile)
                        except Exception as e:
                            logger.debug('get_book_info failed for %s, %s' % (metafile, str(e)))
                            res = {}
                        # title and creator are the minimum we need
                        if 'title' in res and 'creator' in res:
                            book = res['title']
                            author = res['creator']
                            if book and len(book) > 2 and author and len(author) > 2:
                                match = 1
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'gr_id' in res:
                                gr_id = res['gr_id']
                            logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id))
                        if not match:
                            logger.debug("File meta incomplete in %s" % metafile)

                        if not match:  # no author/book from metadata file, and not embedded either
                            match = pattern.match(files)
                            if match:
                                author = match.group("author")
                                book = match.group("book")
                                if len(book) <= 2 or len(author) <= 2:
                                    match = 0
                            if not match:
                                logger.debug("Pattern match failed [%s]" % files)

                        if match:
                            # flag that we found a book in this subdirectory
                            processed_subdirectories.append(subdirectory)

                            # If we have a valid looking isbn, and language != "Unknown", add it to cache
                            if language != "Unknown" and is_valid_isbn(isbn):
                                logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                                # we need to add it to language cache if not already
                                # there, is_valid_isbn has checked length is 10 or 13
                                if len(isbn) == 10:
                                    isbnhead = isbn[0:3]
                                else:
                                    isbnhead = isbn[3:6]
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if not match:
                                    myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                    logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                                else:
                                    logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            author, authorid, new = addAuthorNameToDB(author)  # get the author name as we know it...

                            if author:
                                # author exists, check if this book by this author is in our database
                                # metadata might have quotes in book name
                                # some books might be stored under a different author name
                                # eg books by multiple authors, books where author is "writing as"
                                # or books we moved to "merge" authors
                                book = book.replace("'", "")

                                # First try and find it under author and bookname
                                # as we may have it under a different bookid or isbn to goodreads/googlebooks
                                # which might have several bookid/isbn for the same book
                                bookid = find_book_in_db(myDB, author, book)

                                if not bookid:
                                    # Title or author name might not match or multiple authors
                                    # See if the gr_id, gb_id is already in our database
                                    if gr_id:
                                        bookid = gr_id
                                    elif gb_id:
                                        bookid = gb_id
                                    else:
                                        bookid = ""

                                    if bookid:
                                        match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid)
                                        if not match:
                                            msg = 'Unable to find book %s by %s in database, trying to add it using '
                                            if bookid == gr_id:
                                                msg += "GoodReads ID " + gr_id
                                            if bookid == gb_id:
                                                msg += "GoogleBooks ID " + gb_id
                                            logger.debug(msg % (book, author))
                                            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id:
                                                GR_ID = GoodReads(gr_id)
                                                GR_ID.find_book(gr_id, None)
                                            elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id:
                                                GB_ID = GoogleBooks(gb_id)
                                                GB_ID.find_book(gb_id, None)
                                            # see if it's there now...
                                            match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid)
                                            if not match:
                                                logger.debug("Unable to add bookid %s to database" % bookid)
                                                bookid = ""

                                if not bookid and isbn:
                                    # See if the isbn is in our database
                                    match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn)
                                    if match:
                                        bookid = match['BookID']

                                if not bookid:
                                    # get author name from parent directory of this book directory
                                    newauthor = os.path.basename(os.path.dirname(r))
                                    # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_
                                    if newauthor.endswith('_'):
                                        newauthor = newauthor[:-1] + '.'
                                    if author.lower() != newauthor.lower():
                                        logger.debug("Trying authorname [%s]" % newauthor)
                                        bookid = find_book_in_db(myDB, newauthor, book)
                                        if bookid:
                                            logger.warn("%s not found under [%s], found under [%s]" %
                                                        (book, author, newauthor))

                                # at this point if we still have no bookid, it looks like we
                                # have author and book title but no database entry for it
                                if not bookid:
                                    if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                                        # Either goodreads doesn't have the book or it didn't match language prefs
                                        # Since we have the book anyway, try and reload it ignoring language prefs
                                        rescan_count += 1
                                        base_url = 'http://www.goodreads.com/search.xml?q='
                                        params = {"key": lazylibrarian.CONFIG['GR_API']}
                                        if author[1] in '. ':
                                            surname = author
                                            forename = ''
                                            while surname[1] in '. ':
                                                forename = forename + surname[0] + '.'
                                                surname = surname[2:].strip()
                                            if author != forename + ' ' + surname:
                                                logger.debug('Stripped authorname [%s] to [%s %s]' %
                                                            (author, forename, surname))
                                                author = forename + ' ' + surname

                                        author = ' '.join(author.split())  # ensure no extra whitespace

                                        searchname = author + ' ' + book
                                        searchname = cleanName(unaccented(searchname))
                                        searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING))
                                        set_url = base_url + searchterm + '&' + urllib.urlencode(params)
                                        try:
                                            rootxml, in_cache = get_xml_request(set_url)
                                            if not len(rootxml):
                                                logger.debug("Error requesting results from GoodReads")
                                            else:
                                                resultxml = rootxml.getiterator('work')
                                                for item in resultxml:
                                                    booktitle = item.find('./best_book/title').text
                                                    book_fuzz = fuzz.token_set_ratio(booktitle, book)
                                                    if book_fuzz >= 98:
                                                        logger.debug("Rescan found %s : %s" % (booktitle, language))
                                                        rescan_hits += 1
                                                        bookid = item.find('./best_book/id').text
                                                        GR_ID = GoodReads(bookid)
                                                        GR_ID.find_book(bookid, None)
                                                        if language and language != "Unknown":
                                                            # set language from book metadata
                                                            logger.debug("Setting language from metadata %s : %s" % (booktitle, language))
                                                            myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' %
                                                                        (language, bookid))
                                                        break
                                                if not bookid:
                                                    logger.warn("GoodReads doesn't know about %s" % book)
                                        except Exception as e:
                                            logger.error("Error finding rescan results: %s" % str(e))

                                    elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                                        # if we get here using googlebooks it's because googlebooks
                                        # doesn't have the book. No point in looking for it again.
                                        logger.warn("GoogleBooks doesn't know about %s" % book)

                                # see if it's there now...
                                if bookid:
                                    cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors '
                                    cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid
                                    check_status = myDB.match(cmd)

                                    if not check_status:
                                        logger.debug('Unable to find bookid %s in database' % bookid)
                                    else:
                                        if check_status['Status'] != 'Open':
                                            # we found a new book
                                            new_book_count += 1
                                            myDB.action(
                                                'UPDATE books set Status="Open" where BookID="%s"' % bookid)

                                        # store book location so we can check if it gets removed
                                        book_filename = os.path.join(r, files)
                                        if not check_status['BookFile']:  # no previous location
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))
                                        # location may have changed since last scan
                                        elif book_filename != check_status['BookFile']:
                                            modified_count += 1
                                            logger.warn("Updating book location for %s %s from %s to %s" %
                                                        (author, book, check_status['BookFile'], book_filename))
                                            logger.debug("%s %s matched %s BookID %s, [%s][%s]" %
                                                        (author, book, check_status['Status'], bookid,
                                                        check_status['AuthorName'], check_status['BookName']))
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))

                                        # update cover file to cover.jpg in book folder (if exists)
                                        bookdir = os.path.dirname(book_filename)
                                        coverimg = os.path.join(bookdir, 'cover.jpg')
                                        if os.path.isfile(coverimg):
                                            cachedir = lazylibrarian.CACHEDIR
                                            cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg')
                                            copyfile(coverimg, cacheimg)
                                else:
                                    logger.warn(
                                        "Failed to match book [%s] by [%s] in database" % (book, author))
                            else:
                                if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']:
                                    logger.warn("Add authors to database is disabled")
                                    warned = True

        logger.info("%s/%s new/modified book%s found and added to the database" %
                    (new_book_count, modified_count, plural(new_book_count + modified_count)))
        logger.info("%s file%s processed" % (file_count, plural(file_count)))

        if startdir == destdir:
            # On full library scans, check for missing workpages
            setWorkPages()
            # and books with unknown language
            nolang = myDB.match(
                "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'")
            nolang = nolang['counter']
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))
                # show stats if new books were added
            stats = myDB.match(
                "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                    sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")

            st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'],
                 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'],
                 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'],
                 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'],
                 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']}

            for item in st.keys():
                if st[item] is None:
                    st[item] = 0

            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (st['GB_lang_change'], plural(st['GB_lang_change'])))
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (st['GR_lang_hits'], plural(st['GR_lang_hits'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (st['LT_lang_hits'], plural(st['LT_lang_hits'])))
            logger.debug("Language cache was hit %s time%s" %
                         (st['cache_hits'], plural(st['cache_hits'])))
            logger.debug("Unwanted language removed %s book%s" %
                         (st['bad_lang'], plural(st['bad_lang'])))
            logger.debug("Unwanted characters removed %s book%s" %
                         (st['bad_char'], plural(st['bad_char'])))
            logger.debug("Unable to cache language for %s book%s with missing ISBN" %
                         (st['uncached'], plural(st['uncached'])))
            logger.debug("Found %s duplicate book%s" %
                         (st['duplicates'], plural(st['duplicates'])))
            logger.debug("Rescan %s hit%s, %s miss" %
                         (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits))
            logger.debug("Cache %s hit%s, %s miss" %
                         (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.match("select count('ISBN') as counter from languages")
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])

            # Cache any covers and images
            images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
            if len(images):
                logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    bookid = item['bookid']
                    bookimg = item['bookimg']
                    # bookname = item['bookname']
                    newimg, success = cache_img("book", bookid, bookimg)
                    if success:
                        myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

            images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
            if len(images):
                logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    authorid = item['authorid']
                    authorimg = item['authorimg']
                    # authorname = item['authorname']
                    newimg, success = cache_img("author", authorid, authorimg)
                    if success:
                        myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))

            # On full scan, update bookcounts for all authors, not just new ones - refresh may have located
            # new books for existing authors especially if switched provider gb/gr or changed wanted languages
            authors = myDB.select('select AuthorID from authors')
        else:
            # On single author/book import, just update bookcount for that author
            authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""'))

        logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
        for author in authors:
            update_totals(author['AuthorID'])

        logger.info('Library scan complete')
        return new_book_count

    except Exception:
        logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())