Ejemplo n.º 1
0
def import_book(bookid, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, "Wanted"]).start()
        else:
            GB.find_book(bookid, "Wanted")
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, "Wanted"]).start()
        else:
            GR.find_book(bookid, "Wanted")
Ejemplo n.º 2
0
def import_book(bookid, ebook=None, audio=None, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book
        ebook/audio=None makes find_book use configured default """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, ebook, audio]).start()
        else:
            GB.find_book(bookid, ebook, audio)
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, ebook, audio]).start()
        else:
            GR.find_book(bookid, ebook, audio)
Ejemplo n.º 3
0
def import_book(bookid, ebook=None, audio=None, wait=False):
    """ search goodreads or googlebooks for a bookid and import the book
        ebook/audio=None makes find_book use configured default """
    if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
        GB = GoogleBooks(bookid)
        if not wait:
            _ = threading.Thread(target=GB.find_book,
                                 name='GB-IMPORT',
                                 args=[bookid, ebook, audio]).start()
        else:
            GB.find_book(bookid, ebook, audio)
    else:  # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
        GR = GoodReads(bookid)
        if not wait:
            _ = threading.Thread(target=GR.find_book,
                                 name='GR-IMPORT',
                                 args=[bookid, ebook, audio]).start()
        else:
            GR.find_book(bookid, ebook, audio)
Ejemplo n.º 4
0
def grsync(status, shelf):
    try:
        logger.debug('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        gr_shelf = GA.get_gr_shelf(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.debug("There are %s %s books, %s books on goodreads %s shelf" %
                     (len(ll_list), dstatus, len(gr_shelf), shelf))
        #print ll_list
        #print gr_shelf

        not_on_shelf = []
        not_in_ll = []
        for book in ll_list:
            if book not in gr_shelf:
                not_on_shelf.append(book)
        for book in gr_shelf:
            if book not in ll_list:
                not_in_ll.append(book)

        to_shelf = 0
        to_ll = 0
        moved = 0
        # these need adding to shelf
        if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.debug('Not connected to goodreads')
        else:
            for book in not_on_shelf:
                #print "%s is not on shelf" % book
                time_now = int(time.time())
                if time_now <= lazylibrarian.LAST_GOODREADS:
                    time.sleep(1)
                    lazylibrarian.LAST_GOODREADS = time_now
                try:
                    res, content = GA.BookToList(book, shelf)
                except Exception as e:
                    logger.debug("Error in BookToList: %s" % str(e))
                    res = None

                if res:
                    logger.debug("%10s added to %s shelf" % (book, shelf))
                    to_shelf += 1
                    #print content
                else:
                    logger.debug("Failed to add %s to %s shelf" % (book, shelf))
                    #print content

        # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have,
        # if they are already Open/Have, remove from goodreads to-read shelf, add to owned shelf
        # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open

        for book in not_in_ll:
            #print "%s is not marked %s" % (book, status)
            cmd = 'select Status from books where bookid="%s"' % book
            result = myDB.match(cmd)
            if result:
                if result['Status'] in ['Have', 'Open']:  # don't change status if we have it
                    if shelf == 'to-read':

                        time_now = int(time.time())
                        if time_now <= lazylibrarian.LAST_GOODREADS:
                            time.sleep(1)
                            lazylibrarian.LAST_GOODREADS = time_now
                        # need to move it from to-read shelf to owned shelf
                        res, content = GA.BookToList(book, 'to-read', 'remove')
                        if res:
                            logger.debug("%10s removed from to-read shelf" % book)
                            #print content
                        else:
                            logger.debug("Failed to remove %s from to-read shelf" % book)
                            #print content

                        time_now = int(time.time())
                        if time_now <= lazylibrarian.LAST_GOODREADS:
                            time.sleep(1)
                            lazylibrarian.LAST_GOODREADS = time_now
                        res, content = GA.BookToList(book, 'owned', 'add')
                        if res:
                            logger.debug("%10s added to owned shelf" % book)
                            moved += 1
                            #print content
                        else:
                            logger.debug("Failed to add %s to owned shelf" % book)
                            #print content
                    else:
                        logger.debug("%10s is already marked %s" % (book, result['Status']))
                elif shelf == 'owned':
                    myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                else:
                    myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book))
            else:  # add book to database as wanted
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                to_ll += 1

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return to_shelf, to_ll, moved

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0,0,0
Ejemplo n.º 5
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % (
            "goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" %
                    (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.debug("Error removing %s from %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" %
                            (book, shelf, content))

        logger.info("%s missing from goodreads %s" %
                    (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action(
                        'UPDATE books SET Status="Skipped" WHERE BookID=?',
                        (book, ))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" %
                                (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.debug("Error adding %s to %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" %
                            (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action(
                            'UPDATE books SET Status="Have" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in [
                            'Open', 'Have'
                    ]:
                        try:
                            res, content = GA.BookToList(book,
                                                         shelf,
                                                         action='remove')
                        except Exception as e:
                            logger.debug(
                                "Error removing %s from %s: %s %s" %
                                (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" %
                                         (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn(
                                "Failed to remove %s from %s shelf: %s" %
                                (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action(
                            'UPDATE books SET Status="Wanted" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn(
                            "Not setting %s as Wanted, already marked Open" %
                            book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(ll_list)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' %
                     traceback.format_exc())
        return 0, 0
Ejemplo n.º 6
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)
    GB = GoogleBooks(bookid, type)

    # process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?',
                         [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"}
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName": book['authorname'],
            "BookName": book['bookname'],
            "BookDesc": book['bookdesc'],
            "BookIsbn": book['bookisbn'],
            "BookImg": book['bookimg'],
            "BookLink": book['booklink'],
            "BookRate": book['bookrate'],
            "BookPages": book['bookpages'],
            "BookDate": book['bookdate'],
            "BookLang": book['booklang'],
            "Status": "Skipped",
            "BookAdded": formatter.today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)


# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'",
                           [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName": "Authorname: %s" % (authorname),
            "Status": "Loading"
        }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID": author['authorid'],
            "AuthorLink": author['authorlink'],
            "AuthorImg": author['authorimg'],
            "AuthorBorn": author['authorborn'],
            "AuthorDeath": author['authordeath'],
            "DateAdded": formatter.today(),
            "Status": "Loading"
        }
        myDB.upsert("authors", newValueDict, controlValueDict)
Ejemplo n.º 7
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))

        logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" % (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn("Not setting %s as Wanted, already marked Open" % book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0
Ejemplo n.º 8
0
def addBookToDB(bookid, authorname):
    type = 'book'
    myDB = database.DBConnection()
    GR = GoodReads(authorname, type)

# process book
    dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone()
    controlValueDict = {"BookID": bookid}

    if dbbook is None:
        newValueDict = {
            "BookID":   "BookID: %s" % (bookid),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}
    myDB.upsert("books", newValueDict, controlValueDict)

    book = GR.find_book()

    if not book:
        logger.warn("Error fetching bookinfo for BookID: " + bookid)

    else:
        controlValueDict = {"BookID": book['bookid']}
        newValueDict = {
            "AuthorName":   book['authorname'],
            "BookName":     book['bookname'],
            "BookDesc":     book['bookdesc'],
            "BookIsbn":     book['bookisbn'],
            "BookImg":      book['bookimg'],
            "BookLink":     book['booklink'],
            "BookRate":     book['bookrate'],
            "BookPages":    book['bookpages'],
            "BookDate":     book['bookdate'],
            "BookLang":     book['booklang'],
            "Status":       "Skipped",
            "BookAdded":    formatter.today()
            }

        myDB.upsert("books", newValueDict, controlValueDict)

# process author
    dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone()
    controlValueDict = {"AuthorName": authorname}

    if dbauthor is None:
        newValueDict = {
            "AuthorName":   "Authorname: %s" % (authorname),
            "Status":       "Loading"
            }
    else:
        newValueDict = {"Status": "Loading"}

    author = GR.find_author_id()

    if not author:
        logger.warn("Error fetching authorinfo with name: " + authorname)

    else:
        controlValueDict = {"AuthorName": authorname}
        newValueDict = {
            "AuthorID":     author['authorid'],
            "AuthorLink":   author['authorlink'],
            "AuthorImg":    author['authorimg'],
            "AuthorBorn":   author['authorborn'],
            "AuthorDeath":  author['authordeath'],
            "DateAdded":    formatter.today(),
            "Status":       "Loading"
            }
        myDB.upsert("authors", newValueDict, controlValueDict)
Ejemplo n.º 9
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    try:
        destdir = lazylibrarian.DIRECTORY('Destination')
        if not startdir:
            if not destdir:
                return 0
            startdir = destdir

        if not os.path.isdir(startdir):
            logger.warn('Cannot find directory: %s. Not scanning' % startdir)
            return 0

        myDB = database.DBConnection()

        # keep statistics of full library scans
        if startdir == destdir:
            myDB.action('DELETE from stats')
            try:  # remove any extra whitespace in authornames
                authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "%  %"')
                if authors:
                    logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        authorname = ' '.join(author['AuthorName'].split())
                        # Have we got author name both with-and-without extra spaces? If so, merge them
                        duplicate = myDB.match('Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname)
                        if duplicate:
                            myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName'])
                            myDB.action('UPDATE books set AuthorName="%s" WHERE AuthorName="%s"' %
                                        (duplicate['AuthorName'], author['AuthorName']))
                            if author['AuthorID'] != duplicate['AuthorID']:
                                myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' %
                                            (duplicate['AuthorID'], author['AuthorID']))
                        else:
                            myDB.action('UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
                            myDB.action('UPDATE books set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
            except Exception as e:
                logger.info('Error: ' + str(e))

        logger.info('Scanning ebook directory: %s' % startdir)

        new_book_count = 0
        modified_count = 0
        file_count = 0
        author = ""

        if lazylibrarian.FULL_SCAN:
            if startdir == destdir:
                books = myDB.select(
                    'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
            else:
                books = myDB.select(
                    'select AuthorName, BookName, BookFile, BookID from books where Status="Open"' + \
                    ' and BookFile like "' +  startdir + '%"')

            status = lazylibrarian.NOTFOUND_STATUS
            logger.info('Missing books will be marked as %s' % status)
            for book in books:
                bookID = book['BookID']
                bookfile = book['BookFile']

                if not(bookfile and os.path.isfile(bookfile)):
                    myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                    myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                    logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName']))

        # to save repeat-scans of the same directory if it contains multiple formats of the same book,
        # keep track of which directories we've already looked at
        processed_subdirectories = []

        matchString = ''
        for char in lazylibrarian.EBOOK_DEST_FILE:
            matchString = matchString + '\\' + char
        # massage the EBOOK_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.EBOOK_TYPE)
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
        pattern = re.compile(matchString, re.VERBOSE)

        for r, d, f in os.walk(startdir):
            for directory in d[:]:
                # prevent magazine being scanned
                if directory.startswith("_") or directory.startswith("."):
                    d.remove(directory)

            for files in f:
                file_count += 1

                if isinstance(r, str):
                    r = r.decode(lazylibrarian.SYS_ENCODING)

                subdirectory = r.replace(startdir, '')
                # Added new code to skip if we've done this directory before.
                # Made this conditional with a switch in config.ini
                # in case user keeps multiple different books in the same subdirectory
                if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                    logger.debug("[%s] already scanned" % subdirectory)
                else:
                    # If this is a book, try to get author/title/isbn/language
                    # if epub or mobi, read metadata from the book
                    # If metadata.opf exists, use that allowing it to override
                    # embedded metadata. User may have edited metadata.opf
                    # to merge author aliases together
                    # If all else fails, try pattern match for author/title
                    # and look up isbn/lang from LT or GR later
                    match = 0
                    if is_valid_booktype(files):

                        logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory))

                        language = "Unknown"
                        isbn = ""
                        book = ""
                        author = ""
                        gr_id = ""
                        gb_id = ""
                        extn = os.path.splitext(files)[1]

                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == ".epub") or (extn == ".mobi"):
                            book_filename = os.path.join(
                                r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                            try:
                                res = get_book_info(book_filename)
                            except Exception as e:
                                logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e)))
                                res = {}
                            if 'title' in res and 'creator' in res:  # this is the minimum we need
                                match = 1
                                book = res['title']
                                author = res['creator']
                                if 'language' in res:
                                    language = res['language']
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                if 'type' in res:
                                    extn = res['type']

                                logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                             (isbn, language, author, book, extn))
                            else:

                                logger.debug("Book meta incomplete in %s" % book_filename)

                        # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                        # just look for any .opf file in the current directory since we don't know
                        # LL preferred authorname/bookname at this point.
                        # Allow metadata in file to override book contents as may be users pref

                        metafile = opf_file(r)
                        try:
                            res = get_book_info(metafile)
                        except Exception as e:
                            logger.debug('get_book_info failed for %s, %s' % (metafile, str(e)))
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'gr_id' in res:
                                gr_id = res['gr_id']
                            logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id))
                        else:
                            logger.debug("File meta incomplete in %s" % metafile)

                        if not match:  # no author/book from metadata file, and not embedded either
                            match = pattern.match(files)
                            if match:
                                author = match.group("author")
                                book = match.group("book")
                            else:
                                logger.debug("Pattern match failed [%s]" % files)

                        if match:
                            # flag that we found a book in this subdirectory
                            processed_subdirectories.append(subdirectory)

                            # If we have a valid looking isbn, and language != "Unknown", add it to cache
                            if language != "Unknown" and is_valid_isbn(isbn):
                                logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                                # we need to add it to language cache if not already
                                # there, is_valid_isbn has checked length is 10 or 13
                                if len(isbn) == 10:
                                    isbnhead = isbn[0:3]
                                else:
                                    isbnhead = isbn[3:6]
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead))
                                if not match:
                                    myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                    logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                                else:
                                    logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            # get authors name in a consistent format
                            if "," in author:
                                # guess its "surname, forename" or "surname, initial(s)" so swap them round
                                words = author.split(',')
                                forename = words[1].strip()
                                surname = words[0].strip()
                                author = forename + ' ' + surname
                            # reformat any initials, we want to end up with A.B. van Smith
                            if author[1] == ' ' or author[1] == '.':
                                forename = ''
                                while author[1] == ' ' or author[1] == '.':
                                    forename = forename + author[0] + '.'
                                    author = author[2:].strip()
                                author = forename + ' ' + author

                            author = ' '.join(author.split())  # ensure no extra whitespace

                            # Check if the author exists, and import the author if not,
                            # before starting any complicated book-name matching to save repeating the search
                            #
                            check_exist_author = myDB.match(
                                'SELECT * FROM authors where AuthorName="%s"' % author.replace('"', '""'))
                            if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                                # no match for supplied author, but we're allowed to add new ones

                                GR = GoodReads(author)
                                try:
                                    author_gr = GR.find_author_id()
                                except Exception as e:
                                    logger.warn("Error finding author id for [%s] %s" % (author, str(e)))
                                    continue

                                # only try to add if GR data matches found author data
                                if author_gr:
                                    authorname = author_gr['authorname']

                                    # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                    match_auth = author.replace('.', ' ')
                                    match_auth = ' '.join(match_auth.split())

                                    match_name = authorname.replace('.', ' ')
                                    match_name = ' '.join(match_name.split())

                                    match_name = unaccented(match_name)
                                    match_auth = unaccented(match_auth)

                                    # allow a degree of fuzziness to cater for different accented character handling.
                                    # some author names have accents,
                                    # filename may have the accented or un-accented version of the character
                                    # The currently non-configurable value of fuzziness might need to go in config
                                    # We stored GoodReads unmodified author name in
                                    # author_gr, so store in LL db under that
                                    # fuzz.ratio doesn't lowercase for us
                                    match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                    if match_fuzz < 90:
                                        logger.debug("Failed to match author [%s] to authorname [%s] fuzz [%d]" %
                                                    (author, match_name, match_fuzz))

                                    # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
                                    if (author != "Unknown") and (match_fuzz >= 90):
                                        # use "intact" name for author that we stored in
                                        # GR author_dict, not one of the various mangled versions
                                        # otherwise the books appear to be by a different author!
                                        author = author_gr['authorname']
                                        # this new authorname may already be in the
                                        # database, so check again
                                        check_exist_author = myDB.match(
                                            'SELECT * FROM authors where AuthorName="%s"' % author.replace('"', '""'))
                                        if not check_exist_author:
                                            logger.info("Adding new author [%s]" % author)
                                            try:
                                                addAuthorToDB(author, refresh=False)
                                                check_exist_author = myDB.match(
                                                    'SELECT * FROM authors where AuthorName="%s"' %
                                                    author.replace('"', '""'))
                                            except Exception:
                                                continue

                            # check author exists in db, either newly loaded or already there
                            if not check_exist_author:
                                logger.debug("Failed to match author [%s] in database" % author)
                            else:
                                # author exists, check if this book by this author is in our database
                                # metadata might have quotes in book name
                                # some books might be stored under a different author name
                                # eg books by multiple authors, books where author is "writing as"
                                # or books we moved to "merge" authors
                                book = book.replace("'", "")
                                match = False
                                # See if the gr_id, gb_id or isbn is already in our database
                                if gr_id:
                                    bookid = gr_id
                                elif gb_id:
                                    bookid = gb_id
                                else:
                                    bookid = ""

                                if bookid:
                                    match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid)
                                    if not match:
                                        logger.debug('Unable to find book %s by %s in database, trying to add it' %
                                                    (book, author))
                                        if lazylibrarian.BOOK_API == "GoodReads" and gr_id:
                                            GR_ID = GoodReads(gr_id)
                                            GR_ID.find_book(gr_id, None)
                                        elif lazylibrarian.BOOK_API == "GoogleBooks" and gb_id:
                                            GB_ID = GoogleBooks(gb_id)
                                            GB_ID.find_book(gb_id, None)
                                        # see if it's there now...
                                        match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid)
                                        if not match:
                                            logger.debug("Unable to add bookid %s to database" % bookid)
                                            bookid = ""
                                if not bookid and isbn:
                                    match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn)
                                    if match:
                                        bookid = match['BookID']
                                if not bookid:
                                    # Try and find it under metadata authorname
                                    bookid = find_book_in_db(myDB, author, book)
                                    if not bookid:
                                        # get author name from parent directory of this book directory
                                        newauthor = os.path.basename(os.path.dirname(r))
                                        if author.lower() != newauthor.lower():
                                            bookid = find_book_in_db(myDB, newauthor, book)
                                            if bookid:
                                                logger.warn("%s not found under [%s], found under [%s]" %
                                                            (book, author, newauthor))

                                if bookid:
                                    check_status = myDB.match(
                                        'SELECT Status, BookFile, AuthorName, BookName from books where BookID="%s"' %
                                        bookid)

                                    if not check_status:
                                        logger.debug('Unable to find bookid %s in database' % bookid)
                                    else:
                                        if check_status['Status'] != 'Open':
                                            # we found a new book
                                            new_book_count += 1
                                            myDB.action(
                                                'UPDATE books set Status="Open" where BookID="%s"' % bookid)

                                        # store book location so we can check if it gets removed
                                        book_filename = os.path.join(r, files)
                                        if not check_status['BookFile']:  # no previous location
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))
                                        # location may have changed since last scan
                                        elif book_filename != check_status['BookFile']:
                                            modified_count += 1
                                            logger.warn("Updating book location for %s %s from %s to %s" %
                                                        (author, book, check_status['BookFile'], book_filename))
                                            logger.debug("%s %s matched BookID %s, [%s][%s]" % (author, book, bookid,
                                                        check_status['AuthorName'], check_status['BookName']))
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))

                                        # update cover file to cover.jpg in book folder (if exists)
                                        bookdir = os.path.dirname(book_filename)
                                        coverimg = os.path.join(bookdir, 'cover.jpg')
                                        if os.path.isfile(coverimg):
                                            cachedir = lazylibrarian.CACHEDIR
                                            cacheimg = os.path.join(cachedir, bookid + '.jpg')
                                            copyfile(coverimg, cacheimg)
                                else:
                                    logger.warn(
                                        "Failed to match book [%s] by [%s] in database" % (book, author))

        logger.info("%s/%s new/modified book%s found and added to the database" %
                    (new_book_count, modified_count, plural(new_book_count + modified_count)))
        logger.info("%s file%s processed" % (file_count, plural(file_count)))


        if startdir == destdir:
            # On full library scans, check for missing workpages
            setWorkPages()
            # and books with unknown language
            nolang = myDB.match("select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'")
            nolang = nolang['counter']
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))
             # show stats if new books were added
            stats = myDB.match(
                "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                    sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")
            if stats and stats['sum(GR_book_hits)']:
                if lazylibrarian.BOOK_API == "GoogleBooks":
                    logger.debug("GoogleBooks was hit %s time%s for books" %
                                 (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                    logger.debug("GoogleBooks language was changed %s time%s" %
                                 (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)'])))
                if lazylibrarian.BOOK_API == "GoodReads":
                    logger.debug("GoodReads was hit %s time%s for books" %
                                 (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                    logger.debug("GoodReads was hit %s time%s for languages" %
                                 (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)'])))
                logger.debug("LibraryThing was hit %s time%s for languages" %
                             (stats['sum(LT_lang_hits)'], plural(stats['sum(LT_lang_hits)'])))
                logger.debug("Language cache was hit %s time%s" %
                             (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
                logger.debug("Unwanted language removed %s book%s" %
                             (stats['sum(bad_lang)'], plural(stats['sum(bad_lang)'])))
                logger.debug("Unwanted characters removed %s book%s" %
                             (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
                logger.debug("Unable to cache language for %s book%s with missing ISBN" %
                             (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
                logger.debug("Found %s duplicate book%s" %
                             (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
                logger.debug("Cache %s hit%s, %s miss" %
                             (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
                cachesize = myDB.match("select count('ISBN') as counter from languages")
                logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])

            # Cache any covers and images
            images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
            if len(images):
                logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    bookid = item['bookid']
                    bookimg = item['bookimg']
                    bookname = item['bookname']
                    newimg = cache_cover(bookid, bookimg)
                    if newimg:
                        myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

            images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
            if len(images):
                logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    authorid = item['authorid']
                    authorimg = item['authorimg']
                    authorname = item['authorname']
                    newimg = cache_cover(authorid, authorimg)
                    if newimg:
                        myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))

            # On full scan, update bookcounts for all authors, not just new ones - refresh may have located
            # new books for existing authors especially if switched provider gb/gr or changed wanted languages
            authors = myDB.select('select AuthorID from authors')
        else:
            # On single author/book import, just update bookcount for that author
            authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""'))

        logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
        for author in authors:
            update_totals(author['AuthorID'])

        logger.info('Library scan complete')
        return new_book_count

    except Exception as e:
        logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
Ejemplo n.º 10
0
def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))
        # print ll_list
        # print gr_shelf

        not_on_shelf = []
        not_in_ll = []
        for book in ll_list:
            if book not in gr_shelf:
                not_on_shelf.append(book)
        for book in gr_shelf:
            if book not in ll_list:
                not_in_ll.append(book)

        to_shelf = 0
        to_ll = 0
        # these need adding to shelf
        if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.debug('Not connected to goodreads')
        else:
            for book in not_on_shelf:
                # print "%s is not on shelf" % book
                try:
                    res, content = GA.BookToList(book, shelf)
                except Exception as e:
                    logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e)))
                    res = None

                if res:
                    if lazylibrarian.LOGLEVEL > 2:
                        logger.debug("%10s added to %s shelf" % (book, shelf))
                        to_shelf += 1
                        # print content
                else:
                    logger.debug("Failed to add %s to %s shelf" % (book, shelf))
                    # print content

        # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have,
        # if in "to-read" and already marked "Open/Have", optionally delete from "to-read"
        # (depending on user prefs, to-read and wanted might not be the same thing)
        # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open

        for book in not_in_ll:
            # print "%s is not marked %s" % (book, status)
            cmd = 'select Status from books where bookid="%s"' % book
            result = myDB.match(cmd)
            if result:
                if result['Status'] in ['Have', 'Open']:  # don't change status if we have it
                    logger.debug("%10s is already marked %s" % (book, result['Status']))
                    if lazylibrarian.CONFIG['GR_UNIQUE']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e)))
                            res = None
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            # print content
                        else:
                            logger.debug("Failed to remove %s from %s shelf" % (book, shelf))
                            # print content

                elif shelf == 'owned':
                    myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                else:
                    myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book))
            else:  # add book to database as wanted
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                to_ll += 1

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return to_shelf, to_ll

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0, 0
Ejemplo n.º 11
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    try:
        destdir = lazylibrarian.DIRECTORY('Destination')
        if not startdir:
            if not destdir:
                logger.warn('Cannot find destination directory: %s. Not scanning' % destdir)
                return 0
            startdir = destdir

        if not os.path.isdir(startdir):
            logger.warn('Cannot find directory: %s. Not scanning' % startdir)
            return 0

        if not internet():
            logger.warn('Libraryscan: No internet connection')
            return 0

        myDB = database.DBConnection()

        # keep statistics of full library scans
        if startdir == destdir:
            myDB.action('DELETE from stats')
            try:  # remove any extra whitespace in authornames
                authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "%  %"')
                if authors:
                    logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        authorname = ' '.join(author['AuthorName'].split())
                        # Have we got author name both with-and-without extra spaces? If so, merge them
                        duplicate = myDB.match(
                            'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname)
                        if duplicate:
                            myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName'])
                            if author['AuthorID'] != duplicate['AuthorID']:
                                myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' %
                                            (duplicate['AuthorID'], author['AuthorID']))
                        else:
                            myDB.action(
                                'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
            except Exception as e:
                logger.info('Error: ' + str(e))

        logger.info('Scanning ebook directory: %s' % startdir)

        new_book_count = 0
        modified_count = 0
        rescan_count = 0
        rescan_hits = 0
        file_count = 0
        author = ""

        if lazylibrarian.CONFIG['FULL_SCAN']:
            cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors'
            cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"'
            if not startdir == destdir:
                cmd += ' and BookFile like "' + startdir + '%"'
            books = myDB.select(cmd)
            status = lazylibrarian.CONFIG['NOTFOUND_STATUS']
            logger.info('Missing books will be marked as %s' % status)
            for book in books:
                bookID = book['BookID']
                bookfile = book['BookFile']

                if not (bookfile and os.path.isfile(bookfile)):
                    myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                    myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                    logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName']))

        # to save repeat-scans of the same directory if it contains multiple formats of the same book,
        # keep track of which directories we've already looked at
        processed_subdirectories = []
        warned = False  # have we warned about no new authors setting
        matchString = ''
        for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the EBOOK_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
        pattern = re.compile(matchString, re.VERBOSE)

        for r, d, f in os.walk(startdir):
            for directory in d[:]:
                # prevent magazine being scanned
                if directory.startswith("_") or directory.startswith("."):
                    d.remove(directory)

            for files in f:
                file_count += 1

                if isinstance(r, str):
                    r = r.decode(lazylibrarian.SYS_ENCODING)

                subdirectory = r.replace(startdir, '')
                # Added new code to skip if we've done this directory before.
                # Made this conditional with a switch in config.ini
                # in case user keeps multiple different books in the same subdirectory
                if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories):
                    logger.debug("[%s] already scanned" % subdirectory)
                else:
                    # If this is a book, try to get author/title/isbn/language
                    # if epub or mobi, read metadata from the book
                    # If metadata.opf exists, use that allowing it to override
                    # embedded metadata. User may have edited metadata.opf
                    # to merge author aliases together
                    # If all else fails, try pattern match for author/title
                    # and look up isbn/lang from LT or GR later
                    match = 0
                    if is_valid_booktype(files):

                        logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory))

                        language = "Unknown"
                        isbn = ""
                        book = ""
                        author = ""
                        gr_id = ""
                        gb_id = ""
                        extn = os.path.splitext(files)[1]

                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == ".epub") or (extn == ".mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)

                            try:
                                res = get_book_info(book_filename)
                            except Exception as e:
                                logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e)))
                                res = {}
                            # title and creator are the minimum we need
                            if 'title' in res and 'creator' in res:
                                book = res['title']
                                author = res['creator']
                                if book and len(book) > 2 and author and len(author) > 2:
                                    match = 1
                                if 'language' in res:
                                    language = res['language']
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                if 'type' in res:
                                    extn = res['type']
                                logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                             (isbn, language, author, book, extn))
                            if not match:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                        # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                        # just look for any .opf file in the current directory since we don't know
                        # LL preferred authorname/bookname at this point.
                        # Allow metadata in file to override book contents as may be users pref

                        metafile = opf_file(r)
                        try:
                            res = get_book_info(metafile)
                        except Exception as e:
                            logger.debug('get_book_info failed for %s, %s' % (metafile, str(e)))
                            res = {}
                        # title and creator are the minimum we need
                        if 'title' in res and 'creator' in res:
                            book = res['title']
                            author = res['creator']
                            if book and len(book) > 2 and author and len(author) > 2:
                                match = 1
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'gr_id' in res:
                                gr_id = res['gr_id']
                            logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id))
                        if not match:
                            logger.debug("File meta incomplete in %s" % metafile)

                        if not match:  # no author/book from metadata file, and not embedded either
                            match = pattern.match(files)
                            if match:
                                author = match.group("author")
                                book = match.group("book")
                                if len(book) <= 2 or len(author) <= 2:
                                    match = 0
                            if not match:
                                logger.debug("Pattern match failed [%s]" % files)

                        if match:
                            # flag that we found a book in this subdirectory
                            processed_subdirectories.append(subdirectory)

                            # If we have a valid looking isbn, and language != "Unknown", add it to cache
                            if language != "Unknown" and is_valid_isbn(isbn):
                                logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                                # we need to add it to language cache if not already
                                # there, is_valid_isbn has checked length is 10 or 13
                                if len(isbn) == 10:
                                    isbnhead = isbn[0:3]
                                else:
                                    isbnhead = isbn[3:6]
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if not match:
                                    myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                    logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                                else:
                                    logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            author, authorid, new = addAuthorNameToDB(author)  # get the author name as we know it...

                            if author:
                                # author exists, check if this book by this author is in our database
                                # metadata might have quotes in book name
                                # some books might be stored under a different author name
                                # eg books by multiple authors, books where author is "writing as"
                                # or books we moved to "merge" authors
                                book = book.replace("'", "")

                                # First try and find it under author and bookname
                                # as we may have it under a different bookid or isbn to goodreads/googlebooks
                                # which might have several bookid/isbn for the same book
                                bookid = find_book_in_db(myDB, author, book)

                                if not bookid:
                                    # Title or author name might not match or multiple authors
                                    # See if the gr_id, gb_id is already in our database
                                    if gr_id:
                                        bookid = gr_id
                                    elif gb_id:
                                        bookid = gb_id
                                    else:
                                        bookid = ""

                                    if bookid:
                                        match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid)
                                        if not match:
                                            msg = 'Unable to find book %s by %s in database, trying to add it using '
                                            if bookid == gr_id:
                                                msg += "GoodReads ID " + gr_id
                                            if bookid == gb_id:
                                                msg += "GoogleBooks ID " + gb_id
                                            logger.debug(msg % (book, author))
                                            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id:
                                                GR_ID = GoodReads(gr_id)
                                                GR_ID.find_book(gr_id, None)
                                            elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id:
                                                GB_ID = GoogleBooks(gb_id)
                                                GB_ID.find_book(gb_id, None)
                                            # see if it's there now...
                                            match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid)
                                            if not match:
                                                logger.debug("Unable to add bookid %s to database" % bookid)
                                                bookid = ""

                                if not bookid and isbn:
                                    # See if the isbn is in our database
                                    match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn)
                                    if match:
                                        bookid = match['BookID']

                                if not bookid:
                                    # get author name from parent directory of this book directory
                                    newauthor = os.path.basename(os.path.dirname(r))
                                    # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_
                                    if newauthor.endswith('_'):
                                        newauthor = newauthor[:-1] + '.'
                                    if author.lower() != newauthor.lower():
                                        logger.debug("Trying authorname [%s]" % newauthor)
                                        bookid = find_book_in_db(myDB, newauthor, book)
                                        if bookid:
                                            logger.warn("%s not found under [%s], found under [%s]" %
                                                        (book, author, newauthor))

                                # at this point if we still have no bookid, it looks like we
                                # have author and book title but no database entry for it
                                if not bookid:
                                    if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                                        # Either goodreads doesn't have the book or it didn't match language prefs
                                        # Since we have the book anyway, try and reload it ignoring language prefs
                                        rescan_count += 1
                                        base_url = 'http://www.goodreads.com/search.xml?q='
                                        params = {"key": lazylibrarian.CONFIG['GR_API']}
                                        if author[1] in '. ':
                                            surname = author
                                            forename = ''
                                            while surname[1] in '. ':
                                                forename = forename + surname[0] + '.'
                                                surname = surname[2:].strip()
                                            if author != forename + ' ' + surname:
                                                logger.debug('Stripped authorname [%s] to [%s %s]' %
                                                            (author, forename, surname))
                                                author = forename + ' ' + surname

                                        author = ' '.join(author.split())  # ensure no extra whitespace

                                        searchname = author + ' ' + book
                                        searchname = cleanName(unaccented(searchname))
                                        searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING))
                                        set_url = base_url + searchterm + '&' + urllib.urlencode(params)
                                        try:
                                            rootxml, in_cache = get_xml_request(set_url)
                                            if not len(rootxml):
                                                logger.debug("Error requesting results from GoodReads")
                                            else:
                                                resultxml = rootxml.getiterator('work')
                                                for item in resultxml:
                                                    booktitle = item.find('./best_book/title').text
                                                    book_fuzz = fuzz.token_set_ratio(booktitle, book)
                                                    if book_fuzz >= 98:
                                                        logger.debug("Rescan found %s : %s" % (booktitle, language))
                                                        rescan_hits += 1
                                                        bookid = item.find('./best_book/id').text
                                                        GR_ID = GoodReads(bookid)
                                                        GR_ID.find_book(bookid, None)
                                                        if language and language != "Unknown":
                                                            # set language from book metadata
                                                            logger.debug("Setting language from metadata %s : %s" % (booktitle, language))
                                                            myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' %
                                                                        (language, bookid))
                                                        break
                                                if not bookid:
                                                    logger.warn("GoodReads doesn't know about %s" % book)
                                        except Exception as e:
                                            logger.error("Error finding rescan results: %s" % str(e))

                                    elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                                        # if we get here using googlebooks it's because googlebooks
                                        # doesn't have the book. No point in looking for it again.
                                        logger.warn("GoogleBooks doesn't know about %s" % book)

                                # see if it's there now...
                                if bookid:
                                    cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors '
                                    cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid
                                    check_status = myDB.match(cmd)

                                    if not check_status:
                                        logger.debug('Unable to find bookid %s in database' % bookid)
                                    else:
                                        if check_status['Status'] != 'Open':
                                            # we found a new book
                                            new_book_count += 1
                                            myDB.action(
                                                'UPDATE books set Status="Open" where BookID="%s"' % bookid)

                                        # store book location so we can check if it gets removed
                                        book_filename = os.path.join(r, files)
                                        if not check_status['BookFile']:  # no previous location
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))
                                        # location may have changed since last scan
                                        elif book_filename != check_status['BookFile']:
                                            modified_count += 1
                                            logger.warn("Updating book location for %s %s from %s to %s" %
                                                        (author, book, check_status['BookFile'], book_filename))
                                            logger.debug("%s %s matched %s BookID %s, [%s][%s]" %
                                                        (author, book, check_status['Status'], bookid,
                                                        check_status['AuthorName'], check_status['BookName']))
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))

                                        # update cover file to cover.jpg in book folder (if exists)
                                        bookdir = os.path.dirname(book_filename)
                                        coverimg = os.path.join(bookdir, 'cover.jpg')
                                        if os.path.isfile(coverimg):
                                            cachedir = lazylibrarian.CACHEDIR
                                            cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg')
                                            copyfile(coverimg, cacheimg)
                                else:
                                    logger.warn(
                                        "Failed to match book [%s] by [%s] in database" % (book, author))
                            else:
                                if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']:
                                    logger.warn("Add authors to database is disabled")
                                    warned = True

        logger.info("%s/%s new/modified book%s found and added to the database" %
                    (new_book_count, modified_count, plural(new_book_count + modified_count)))
        logger.info("%s file%s processed" % (file_count, plural(file_count)))

        if startdir == destdir:
            # On full library scans, check for missing workpages
            setWorkPages()
            # and books with unknown language
            nolang = myDB.match(
                "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'")
            nolang = nolang['counter']
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))
                # show stats if new books were added
            stats = myDB.match(
                "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                    sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")

            st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'],
                 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'],
                 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'],
                 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'],
                 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']}

            for item in st.keys():
                if st[item] is None:
                    st[item] = 0

            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (st['GB_lang_change'], plural(st['GB_lang_change'])))
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (st['GR_lang_hits'], plural(st['GR_lang_hits'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (st['LT_lang_hits'], plural(st['LT_lang_hits'])))
            logger.debug("Language cache was hit %s time%s" %
                         (st['cache_hits'], plural(st['cache_hits'])))
            logger.debug("Unwanted language removed %s book%s" %
                         (st['bad_lang'], plural(st['bad_lang'])))
            logger.debug("Unwanted characters removed %s book%s" %
                         (st['bad_char'], plural(st['bad_char'])))
            logger.debug("Unable to cache language for %s book%s with missing ISBN" %
                         (st['uncached'], plural(st['uncached'])))
            logger.debug("Found %s duplicate book%s" %
                         (st['duplicates'], plural(st['duplicates'])))
            logger.debug("Rescan %s hit%s, %s miss" %
                         (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits))
            logger.debug("Cache %s hit%s, %s miss" %
                         (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.match("select count('ISBN') as counter from languages")
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])

            # Cache any covers and images
            images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
            if len(images):
                logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    bookid = item['bookid']
                    bookimg = item['bookimg']
                    # bookname = item['bookname']
                    newimg, success = cache_img("book", bookid, bookimg)
                    if success:
                        myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

            images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
            if len(images):
                logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    authorid = item['authorid']
                    authorimg = item['authorimg']
                    # authorname = item['authorname']
                    newimg, success = cache_img("author", authorid, authorimg)
                    if success:
                        myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))

            # On full scan, update bookcounts for all authors, not just new ones - refresh may have located
            # new books for existing authors especially if switched provider gb/gr or changed wanted languages
            authors = myDB.select('select AuthorID from authors')
        else:
            # On single author/book import, just update bookcount for that author
            authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""'))

        logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
        for author in authors:
            update_totals(author['AuthorID'])

        logger.info('Library scan complete')
        return new_book_count

    except Exception:
        logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())