Example #1
0
    def _notify(self,
                message=None,
                event=None,
                pushbullet_token=None,
                pushbullet_deviceid=None,
                force=False):
        """
        Sends a pushbullet notification based on the provided info or LL config

        title: The title of the notification to send
        message: The message string to send
        username: The username to send the notification to (optional, defaults to the username in the config)
        force: If True then the notification will be sent even if pushbullet is disabled in the config
        """
        try:
            message = unaccented(message)
        except Exception as e:
            logger.warn("Pushbullet: could not convert  message: %s" % e)

        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_PUSHBULLET'] and not force:
            return False
        logger.debug("Pushbullet: Sending notification " + str(message))

        return self._sendPushbullet(message,
                                    event,
                                    pushbullet_token,
                                    pushbullet_deviceid,
                                    force=force)
Example #2
0
def setBookAuthors(book):
    myDB = database.DBConnection()
    newauthors = 0
    newrefs = 0
    try:
        authorlist = getBookAuthors(book['bookid'])
        for author in authorlist:
            authtype = author['type']
            if authtype in ['primary author', 'main author', 'secondary author']:
                if author['role'] in ['Author', '—'] and author['work'] == 'all editions':
                    name = formatAuthorName(unaccented(author['name']))
                    exists = myDB.match('select authorid from authors where authorname=?', (name,))
                    if exists:
                        authorid = exists['authorid']
                    else:
                        # try to add new author to database by name
                        name, authorid, new = lazylibrarian.importer.addAuthorNameToDB(name, False, False)
                        if new and authorid:
                            newauthors += 1
                    if authorid:
                        # suppress duplicates in bookauthors
                        myDB.action('INSERT into bookauthors (AuthorID, BookID) VALUES (?, ?)',
                                    (authorid, book['bookid']), suppress='UNIQUE')
                        newrefs += 1
    except Exception as e:
        logger.debug("Error parsing authorlist for %s: %s" % (book['bookname'], str(e)))
    return newauthors, newrefs
Example #3
0
    def _notify(self, message=None, event=None, slack_token=None, method=None, force=False):
        """
        Sends a slack incoming-webhook notification based on the provided info or LL config

        message: The message string to send
        force: If True then the notification will be sent even if slack is disabled in the config
        """
        try:
            message = unaccented(message)
        except Exception, e:
            logger.warn("Slack: could not convert message: %s" % e)
Example #4
0
    def _notify(self,
                message=None,
                event=None,
                pushbullet_token=None,
                pushbullet_deviceid=None):
        """
        Sends a pushbullet notification based on the provided info or LL config

        title: The title of the notification to send
        message: The message string to send
        username: The username to send the notification to (optional, defaults to the username in the config)
        force: If True then the notification will be sent even if pushbullet is disabled in the config
        """
        try:
            message = unaccented(message)
        except Exception, e:
            logger.warn("Pushbullet: could not convert  message: %s" % e)
Example #5
0
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        if 'windows' in platform.system().lower(
        ):  # windows cp1252 can't handle some accents
            message = formatter.unaccented(message)
        elif PY2:
            message = formatter.safe_unicode(message)
            message = message.encode(lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0,
                                         (formatter.now(), level, threadname,
                                          program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > formatter.check_int(
                    lazylibrarian.CONFIG['LOGLIMIT'], 500):
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno,
                                          message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
Example #6
0
    def _notify(self,
                message=None,
                event=None,
                slack_token=None,
                method=None,
                force=False):
        """
        Sends a slack incoming-webhook notification based on the provided info or LL config

        message: The message string to send
        force: If True then the notification will be sent even if slack is disabled in the config
        """
        try:
            message = unaccented(message)
        except Exception as e:
            logger.warn("Slack: could not convert message: %s" % e)
        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_SLACK'] and not force:
            return False

        return self._sendSlack(message, event, slack_token, method, force)
Example #7
0
def safe_move(src, dst, action='move'):
    """ Move or copy src to dst
        Retry without accents if unicode error as some file systems can't handle (some) accents
        Retry with some characters stripped if bad filename
        eg windows can't handle <>?":| (and maybe others) in filenames
        Return (new) dst if success """

    while action:  # might have more than one problem...
        try:
            if action == 'copy':
                shutil.copy(src, dst)
            else:
                shutil.move(src, dst)
            return dst

        except UnicodeEncodeError:
            newdst = unaccented(dst)
            if newdst != dst:
                dst = newdst
            else:
                raise

        except IOError as e:
            if e.errno == 22:  # bad mode or filename
                drive, path = os.path.splitdrive(dst)
                # strip some characters windows can't handle
                newpath = replace_all(path, __dic__)
                # windows filenames can't end in space or dot
                while newpath and newpath[-1] in '. ':
                    newpath = newpath[:-1]
                # anything left? has it changed?
                if newpath and newpath != path:
                    dst = os.path.join(drive, newpath)
                else:
                    raise
            else:
                raise
        except Exception:
            raise
    return dst
Example #8
0
    def find_author_id(self, refresh=False):
        author = self.name
        author = formatAuthorName(unaccented(author))
        URL = 'https://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params)

        # googlebooks gives us author names with long form unicode characters
        if isinstance(author, str) and hasattr(author, "decode"):
            author = author.decode('utf-8')  # make unicode
        author = unicodedata.normalize('NFC', author)  # normalize to short form

        logger.debug("Searching for author with name: %s" % author)

        authorlist = []
        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("%s finding authorid: %s, %s" % (type(e).__name__, URL, str(e)))
            return authorlist
        if rootxml is None:
            logger.debug("Error requesting authorid")
            return authorlist

        resultxml = rootxml.getiterator('author')

        if resultxml is None:
            logger.warn('No authors found with name: %s' % author)
        else:
            # In spite of how this looks, goodreads only returns one result, even if there are multiple matches
            # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock"
            # who only has one book listed under googlebooks, the rest are under "James Lovelock"
            # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet.
            # For now we'll have to let the user handle this by selecting/adding the author manually
            for author in resultxml:
                authorid = author.attrib.get("id")
                authorlist = self.get_author_info(authorid)
        return authorlist
Example #9
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(
            self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % e)
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
#
# PAB user has said they want this book, don't block for bad language, just warn
#
        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s language does not match preference' %
                         bookname)

        if (rootxml.find('./book/publication_year').text is None):
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        booksub = ''
        bookname = unaccented(bookname)
        if ': ' in bookname:
            parts = bookname.split(': ', 1)
            bookname = parts[0]
            booksub = parts[1]

        dic = {':': '', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)
        bookname = bookname.strip()  # strip whitespace
        booksub = replace_all(booksub, dic)
        booksub = booksub.strip()  # strip whitespace
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": None,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": None,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link = cache_cover(bookid, bookimg)
            if link is not None:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum == None:
            #  try to get series info from librarything
            series, seriesNum = getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"Series": series, "SeriesNum": seriesNum}
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Example #10
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0
        URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(
            self.params)

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)
        books_dict = []
        try:
            rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
        except Exception as e:
            logger.error("Error fetching author books: %s" % e)
            return books_dict
        if rootxml is None:
            logger.debug("Error requesting author books")
            return books_dict
        if not in_cache:
            api_hits = api_hits + 1
        resultxml = rootxml.getiterator('book')

        valid_langs = ([
            valid_lang.strip()
            for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')
        ])

        if not len(resultxml):
            logger.warn('[%s] No books found for author with ID: %s' %
                        (authorname, authorid))
        else:
            logger.debug("[%s] Now processing books with GoodReads API" %
                         authorname)

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            logger.debug(u"url " + URL)

            authorNameResult = rootxml.find('./author/name').text
            logger.debug(u"author name " + authorNameResult)
            loopCount = 1

            while resultxml is not None:
                for book in resultxml:
                    total_count = total_count + 1

                    if (book.find('publication_year').text is None):
                        pubyear = "0000"
                    else:
                        pubyear = book.find('publication_year').text

                    try:
                        bookimg = book.find('image_url').text
                        if ('nocover' in bookimg):
                            bookimg = 'images/nocover.png'
                    except (KeyError, AttributeError):
                        bookimg = 'images/nocover.png'

    # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the
    # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book
    # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language
    # if you really don't want to include them.
    # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that.
    # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want
    # is to get the language. We sleep for one second per book that GR knows about for each author you have in your
    # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has
    # fewer books with unknown language. To get around this and speed up the process, see if we already have a book
    # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2
    # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_
    # be the same language.
    # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book
    # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched
    # but most "unknown" were matched to the correct language.
    # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want
    # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including
    # the ISBNs for languages we don't want and books we reject.
    # The new table is created (if not exists) in init.py so by the time we get here there is an existing table.
    # If we haven't an already matching partial ISBN, look up language code from libraryThing
    # "http://www.librarything.com/api/thingLang.php?isbn=1234567890"
    # If you find a matching language, add it to the database.  If "unknown" or "invalid", try GR as maybe GR can
    # provide a match.
    # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code
    # it's told you it doesn't know.
    # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process
    # everything much faster by not querying for language at all.
    # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster.

                    bookLanguage = "Unknown"
                    find_field = "id"
                    isbn = ""
                    isbnhead = ""
                    if "All" not in valid_langs:  # do we care about language
                        if (book.find('isbn').text is not None):
                            find_field = "isbn"
                            isbn = book.find('isbn').text
                            isbnhead = isbn[0:3]
                        else:
                            if (book.find('isbn13').text is not None):
                                find_field = "isbn13"
                                isbn = book.find('isbn13').text
                                isbnhead = isbn[3:6]
                        if (find_field != 'id'):  # isbn or isbn13 found

                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"'
                                % (isbnhead)).fetchone()
                            if (match):
                                bookLanguage = match['lang']
                                cache_hits = cache_hits + 1
                                logger.debug(
                                    "Found cached language [%s] for %s [%s]" %
                                    (bookLanguage, find_field, isbnhead))
                            else:
                                # no match in cache, try searching librarything for a language code using the isbn
                                # if no language found, librarything return value is "invalid" or "unknown"
                                # returns plain text, not xml
                                BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                try:
                                    librarything_wait()
                                    resp = urllib2.urlopen(BOOK_URL,
                                                           timeout=30).read()
                                    lt_lang_hits = lt_lang_hits + 1
                                    logger.debug(
                                        "LibraryThing reports language [%s] for %s"
                                        % (resp, isbnhead))

                                    if ('invalid' in resp
                                            or 'Unknown' in resp):
                                        find_field = "id"  # reset the field to force search on goodreads
                                    else:
                                        bookLanguage = resp  # found a language code
                                        myDB.action(
                                            'insert into languages values ("%s", "%s")'
                                            % (isbnhead, bookLanguage))
                                        logger.debug(u"LT language %s: %s" %
                                                     (isbnhead, bookLanguage))
                                except Exception as e:
                                    logger.error(
                                        "Error finding LT language result for [%s], %s"
                                        % (isbn, e))
                                    find_field = "id"  # reset the field to search on goodreads

                        if (find_field == 'id'):
                            # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api
                            try:
                                if (book.find(find_field).text is not None):
                                    BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                        book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                    logger.debug(u"Book URL: " + BOOK_URL)

                                    try:
                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        BOOK_rootxml, in_cache = get_xml_request(
                                            BOOK_URL)
                                        if BOOK_rootxml is None:
                                            logger.debug(
                                                'Error requesting book language code'
                                            )
                                            bookLanguage = ""
                                        else:
                                            if not in_cache:
                                                # only update last_goodreads if the result wasn't found in the cache
                                                lazylibrarian.LAST_GOODREADS = time_now
                                            bookLanguage = BOOK_rootxml.find(
                                                './book/language_code').text
                                    except Exception as e:
                                        logger.error(
                                            "Error finding book results: %s" %
                                            e)
                                    if not in_cache:
                                        gr_lang_hits = gr_lang_hits + 1
                                    if not bookLanguage:
                                        bookLanguage = "Unknown"

                                    if (isbnhead != ""):
                                        # GR didn't give an isbn so we can't cache it, just use language for this book
                                        myDB.action(
                                            'insert into languages values ("%s", "%s")'
                                            % (isbnhead, bookLanguage))
                                        logger.debug(
                                            "GoodReads reports language [%s] for %s"
                                            % (bookLanguage, isbnhead))
                                    else:
                                        not_cached = not_cached + 1

                                    logger.debug(u"GR language: " +
                                                 bookLanguage)
                                else:
                                    logger.debug(
                                        "No %s provided for [%s]" %
                                        (find_field, book.find('title').text))
                                    # continue

                            except Exception as e:
                                logger.debug(u"An error has occured: %s" % e)

                        if bookLanguage not in valid_langs:
                            logger.debug('Skipped a book with language %s' %
                                         bookLanguage)
                            ignored = ignored + 1
                            continue
                    bookname = book.find('title').text
                    bookid = book.find('id').text
                    bookdesc = book.find('description').text
                    bookisbn = book.find('isbn').text
                    bookpub = book.find('publisher').text
                    booklink = book.find('link').text
                    bookrate = float(book.find('average_rating').text)
                    bookpages = book.find('num_pages').text
                    bookname = unaccented(bookname)
                    if ': ' in bookname:
                        parts = bookname.split(': ', 1)
                        bookname = parts[0]
                        booksub = parts[1]
                    else:
                        booksub = ''
                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace
                    booksub = replace_all(booksub, dic)
                    booksub = booksub.strip()  # strip whitespace
                    if booksub:
                        series, seriesNum = bookSeries(booksub)
                    else:
                        series, seriesNum = bookSeries(bookname)

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    # We use bookid, then reject if another author/title has a different bookid so we just keep one...
                    find_book_status = myDB.select(
                        'SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False

                    if re.match('[^\w-]', bookname
                                ):  # reject books with bad characters in title
                        logger.debug(u"removed result [" + bookname +
                                     "] for bad characters")
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug(
                            'Rejecting bookid %s for %s, no bookname' %
                            (bookid, authorNameResult))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select(
                            'SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"'
                            % (bookname, authorNameResult))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (find_book['BookID'],
                                           authorNameResult, bookname, bookid))
                                    duplicates = duplicates + 1
                                    rejected = True
                                    break

                    if not rejected:
                        find_books = myDB.select(
                            'SELECT * FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            logger.debug(
                                'Rejecting bookid %s for [%s][%s] already got this bookid in database'
                                % (bookid, authorNameResult, bookname))
                            duplicates = duplicates + 1
                            rejected = True
                            break

                    if not rejected:
                        if book_status != "Ignored":
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorName": authorNameResult,
                                    "AuthorID": authorid,
                                    "AuthorLink": None,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": None,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today(),
                                    "Series": series,
                                    "SeriesNum": seriesNum
                                }

                                resultsCount = resultsCount + 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug(u"Book found: " +
                                             book.find('title').text + " " +
                                             pubyear)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(
                                        u'Updated cover for %s to %s' %
                                        (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link is not None:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            if seriesNum == None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' %
                                                 (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)

                            if not find_book_status:
                                logger.debug(u"[%s] Added book: %s" %
                                             (authorname, bookname))
                                added_count = added_count + 1
                            else:
                                logger.debug(u"[%s] Updated book: %s" %
                                             (authorname, bookname))
                                updated_count = updated_count + 1
                        else:
                            book_ignore_count = book_ignore_count + 1

                loopCount = loopCount + 1
                URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                      urllib.urlencode(self.params) + '&page=' + str(loopCount)
                resultxml = None
                try:
                    rootxml, in_cache = get_xml_request(URL,
                                                        useCache=not refresh)
                    if rootxml is None:
                        logger.debug('Error requesting next page of results')
                    else:
                        resultxml = rootxml.getiterator('book')
                        if not in_cache:
                            api_hits = api_hits + 1
                except Exception as e:
                    resultxml = None
                    logger.error("Error finding next page of results: %s" % e)

                if resultxml is not None:
                    if all(False for book in
                           resultxml):  # returns True if iterator is empty
                        resultxml = None

        lastbook = myDB.action(
            'SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC'
            % authorid).fetchone()
        if lastbook:
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }
        myDB.upsert("authors", newValueDict, controlValueDict)

        # This is here because GoodReads sometimes has several entries with the same BookID!
        modified_count = added_count + updated_count

        logger.debug("Found %s total book%s for author" %
                     (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s for author" %
                     (ignored, plural(ignored)))
        logger.debug(
            "Removed %s bad character or no-name result%s for author" %
            (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" %
                     (duplicates, plural(duplicates)))
        logger.debug("Ignored %s book%s by author marked as Ignored" %
                     (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" %
                     (modified_count, plural(modified_count)))

        myDB.action(
            'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)'
            %
            (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
             cache_hits, ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info(
                "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                % (authorname, added_count, plural(added_count), updated_count,
                   plural(updated_count)))
        else:
            logger.info(
                "[%s] Book processing complete: Added %s book%s to the database"
                % (authorname, added_count, plural(added_count)))

        return books_dict
Example #11
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
            for book in resultlist:
                # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                    bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],))
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(u'Found book %s, already marked as "%s"' % (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s, marking as "Wanted"' % bookname)
                            controlValueDict = {"BookID": book['rss_bookid']}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:
                        import_book(book['rss_bookid'])
                        new_books += 1
                else:
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                        headers.append('BookID')
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                        headers.append('ISBN')
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]  # type: dict
                            if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%%) %s: %s" %
                                            (result['isbn_fuzz'], result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]  # type: dict
                            if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                    and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                          result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

        searchbooks = []
        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders and not wishproviders:
            logger.warn('No rss providers are available')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "eBook",
                         "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "AudioBook",
                         "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #12
0
def addAuthorNameToDB(author=None, refresh=False, addbooks=True):
    # get authors name in a consistent format, look them up in the database
    # if not in database, try to import them.
    # return authorname,new where new=False if author already in db, new=True if added
    # authorname returned is our preferred name, or empty string if not found or unable to add
    myDB = database.DBConnection()
    new = False
    if len(author) < 2:
        logger.debug('Invalid Author Name [%s]' % author)
        return "", "", False

    author = formatAuthorName(author)
    # Check if the author exists, and import the author if not,
    check_exist_author = myDB.match(
        'SELECT AuthorID FROM authors where AuthorName="%s"' %
        author.replace('"', '""'))

    if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']:
        logger.debug('Author %s not found in database, trying to add' % author)
        # no match for supplied author, but we're allowed to add new ones
        GR = GoodReads(author)
        try:
            author_gr = GR.find_author_id()
        except Exception as e:
            logger.warn("Error finding author id for [%s] %s" %
                        (author, str(e)))
            return "", "", False

        # only try to add if GR data matches found author data
        if author_gr:
            authorname = author_gr['authorname']
            #authorid = author_gr['authorid']
            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
            match_auth = author.replace('.', ' ')
            match_auth = ' '.join(match_auth.split())

            match_name = authorname.replace('.', ' ')
            match_name = ' '.join(match_name.split())

            match_name = unaccented(match_name)
            match_auth = unaccented(match_auth)

            # allow a degree of fuzziness to cater for different accented character handling.
            # some author names have accents,
            # filename may have the accented or un-accented version of the character
            # The currently non-configurable value of fuzziness might need to go in config
            # We stored GoodReads unmodified author name in
            # author_gr, so store in LL db under that
            # fuzz.ratio doesn't lowercase for us
            match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
            if match_fuzz < 90:
                logger.debug(
                    "Failed to match author [%s] to authorname [%s] fuzz [%d]"
                    % (author, match_name, match_fuzz))

            # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown
            if (author != "Unknown") and (match_fuzz >= 90):
                # use "intact" name for author that we stored in
                # GR author_dict, not one of the various mangled versions
                # otherwise the books appear to be by a different author!
                author = author_gr['authorname']
                authorid = author_gr['authorid']
                # this new authorname may already be in the
                # database, so check again
                check_exist_author = myDB.match(
                    'SELECT AuthorID FROM authors where AuthorID="%s"' %
                    authorid)
                if check_exist_author:
                    logger.debug('Found goodreads authorname %s in database' %
                                 author)
                else:
                    logger.info("Adding new author [%s]" % author)
                    try:
                        addAuthorToDB(authorname=author,
                                      refresh=refresh,
                                      authorid=authorid,
                                      addbooks=addbooks)
                        check_exist_author = myDB.match(
                            'SELECT AuthorID FROM authors where AuthorID="%s"'
                            % authorid)
                        if check_exist_author:
                            new = True
                    except Exception:
                        logger.debug('Failed to add author [%s] to db' %
                                     author)
    # check author exists in db, either newly loaded or already there
    if not check_exist_author:
        logger.debug("Failed to match author [%s] in database" % author)
        return "", "", False
    return author, check_exist_author['AuthorID'], new
Example #13
0
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode):
    params = None
    if searchType in ["book", "shortbook"]:
        authorname, bookname = get_searchterm(book, searchType)
        if provider['BOOKSEARCH'] and provider[
                'BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider[
                'BOOKCAT']:  # if not, try general search
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType in ["audio", "shortaudio"]:
        authorname, bookname = get_searchterm(book, searchType)
        if provider['AUDIOSEARCH'] and provider[
                'AUDIOCAT']:  # if specific audiosearch, use it
            params = {
                "t": provider['AUDIOSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['AUDIOCAT']
            }
        elif provider['GENERALSEARCH'] and provider[
                'AUDIOCAT']:  # if not, try general search
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['AUDIOCAT']
            }
    elif searchType == "mag":
        if provider['MAGSEARCH'] and provider[
                'MAGCAT']:  # if specific magsearch, use it
            params = {
                "t": provider['MAGSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": unaccented(book['searchterm'].replace(':', '')),
                "extended": provider['EXTENDED'],
            }
        elif provider['GENERALSEARCH'] and provider['MAGCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": unaccented(book['searchterm'].replace(':', '')),
                "extended": provider['EXTENDED'],
            }
    else:
        if provider['GENERALSEARCH']:
            if searchType == "shortgeneral":
                searchterm = unaccented(
                    book['searchterm'].split('(')[0].replace(':', ''))
            else:
                searchterm = unaccented(book['searchterm'].replace(':', ''))
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": searchterm,
                "extended": provider['EXTENDED'],
            }
    if params:
        logger.debug('[NewzNabPlus] - %s Search parameters set to %s' %
                     (searchMode, str(params)))
    else:
        logger.debug(
            '[NewzNabPlus] - %s No matching search parameters for %s' %
            (searchMode, searchType))

    return params
Example #14
0
def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {"q": book['searchterm'], "category": "books", "fmt": "rss"}
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #15
0
    def find_book(bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = get_json_request(URL)

        if not jsonresults:
            logger.debug('No results found for %s' % bookid)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            if booklang not in valid_langs and 'All' not in valid_langs:
                logger.debug(
                    'Book %s googlebooks language does not match preference, %s'
                    % (bookname, booklang))
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = ""

        series = ""
        seriesNum = ""
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = ""
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(
                    ')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = ""
        except KeyError:
            booksub = ""

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = ""

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = ""

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = ""
        except KeyError:
            bookisbn = ""

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match(
                'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName="%s"' %
                    author['authorname'])
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author as "ignored"
                    # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (authorname, bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link, success = cache_img("book", bookid, bookimg)
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Example #16
0
def import_CSV(search_dir=None):
    """ Find a csv file in the search_dir and process all the books in it,
        adding authors to the database if not found
        and marking the books as "Wanted"
    """
    try:
        if not search_dir:
            msg = "Alternate Directory not configured"
            logger.warn(msg)
            return msg
        elif not os.path.isdir(search_dir):
            msg = "Alternate Directory [%s] not found" % search_dir
            logger.warn(msg)
            return msg

        csvFile = csv_file(search_dir)

        headers = None
        content = {}

        if not csvFile:
            msg = "No CSV file found in %s" % search_dir
            logger.warn(msg)
            return msg
        else:
            logger.debug(u'Reading file %s' % csvFile)
            reader = csv.reader(open(csvFile))
            for row in reader:
                if reader.line_num == 1:
                    # If we are on the first line, create the headers list from the first row
                    headers = row
                else:
                    # Otherwise, the key in the content dictionary is the first item in the
                    # row and we can create the sub-dictionary by using the zip() function.
                    # we include the key in the dictionary as our exported csv files use
                    # bookid as the key
                    content[row[0]] = dict(zip(headers, row))

            # We can now get to the content by using the resulting dictionary, so to see
            # the list of lines, we can do: print content.keys()  to get a list of keys
            # To see the list of fields available for each book:  print headers

            if 'Author' not in headers or 'Title' not in headers:
                msg = 'Invalid CSV file found %s' % csvFile
                logger.warn(msg)
                return msg

            myDB = database.DBConnection()
            bookcount = 0
            authcount = 0
            skipcount = 0
            logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys()))))
            for item in content.keys():
                authorname = content[item]['Author']
                if isinstance(authorname, str) and hasattr(authorname, "decode"):
                    authorname = authorname.decode(lazylibrarian.SYS_ENCODING)
                authorname = formatAuthorName(authorname)
                title = content[item]['Title']
                if isinstance(title, str) and hasattr(title, "decode"):
                    title = title.decode(lazylibrarian.SYS_ENCODING)

                authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,))

                if authmatch:
                    logger.debug(u"CSV: Author %s found in database" % authorname)
                else:
                    logger.debug(u"CSV: Author %s not found" % authorname)
                    newauthor, authorid, new = addAuthorNameToDB(author=authorname,
                                                                 addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS'])
                    if len(newauthor) and newauthor != authorname:
                        logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor))
                        authorname = newauthor
                    if new:
                        authcount += 1

                bookmatch = finditem(content[item], authorname, headers)
                result = ''
                if bookmatch:
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        bookcount += 1
                else:
                    searchterm = "%s <ll> %s" % (title, authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            bookcount += 1
                            bookmatch = True

                if not bookmatch:
                    msg = "Skipping book %s by %s" % (title, authorname)
                    if not result:
                        msg += ', No results returned'
                        logger.warn(msg)
                    else:
                        msg += ', No match found'
                        logger.warn(msg)
                        msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                    result['authorname'], result['bookname'])
                        logger.warn(msg)
                    skipcount += 1
            msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \
                  (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount))
            logger.info(msg)
            return msg
    except Exception:
        msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc()
        logger.error(msg)
        return msg
Example #17
0
def WWT(book=None, test=False):
    errmsg = ''
    provider = "WorldWideTorrents"
    host = lazylibrarian.CONFIG['WWT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/torrents-search.php")

    sterm = makeUnicode(book['searchterm'])

    cat = 0  # 0=all, 36=ebooks, 52=mags, 56=audiobooks
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 56
        elif book['library'] == 'eBook':
            cat = 36
        elif book['library'] == 'magazine':
            cat = 52

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:
        params = {"search": book['searchterm'], "page": page, "cat": cat}
        searchURL = providerurl + "/?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # might return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')

            try:
                tables = soup.find_all('table')  # un-named table
                table = tables[2]
                if table:
                    rows = table.find_all('tr')
            except IndexError:  # no results table in result page
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers

            for row in rows:
                td = row.find_all('td')
                if len(td) > 3:
                    try:
                        title = unaccented(td[0].text)
                        # can return magnet or torrent or both.
                        magnet = ''
                        url = ''
                        mode = 'torrent'
                        try:
                            magnet = 'magnet' + str(
                                td[0]).split('href="magnet')[1].split('"')[0]
                            mode = 'magnet'
                        except IndexError:
                            pass
                        try:
                            url = url_fix(host + '/download.php') + \
                                          str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent'
                            mode = 'torrent'
                        except IndexError:
                            pass

                        if not url or (magnet and url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                        try:
                            size = str(td[1].text).replace('&nbsp;',
                                                           '').upper()
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0
                        try:
                            seeders = int(td[2].text)
                        except ValueError:
                            seeders = 0

                        if not url or not title:
                            logger.debug('Missing url or title')
                        elif minimumseeders < int(seeders):
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                mode,
                                'priority':
                                lazylibrarian.CONFIG['WWT_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                            next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))
        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg
Example #18
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Example #19
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.GB_API:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
            str(bookid) + "?key=" + lazylibrarian.GB_API
        jsonresults, in_cache = get_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookname)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug(
                'Book %s does not contain author field, skipping' %
                bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted
            # this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])
            if booklang not in valid_langs:
                logger.debug(
                    'Book %s language does not match preference' %
                    bookname)
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = None

        series = None
        seriesNum = None
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = None
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = None
        except KeyError:
            booksub = None

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = None

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = None

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo'][
                    'industryIdentifiers'][0]['identifier']
            else:
                bookisbn = None
        except KeyError:
            bookisbn = None

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        name = jsonresults['volumeInfo']['authors'][0]
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorName": authorname,
            "AuthorID": AuthorID,
            "AuthorLink": "",
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today(),
            "Series": series,
            "SeriesNum": seriesNum
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.debug("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link = cache_cover(bookid, bookimg)
                if link is not None:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)

        if seriesNum is None:
            # try to get series info from librarything
            series, seriesNum = getWorkSeries(bookid)
            if seriesNum:
                logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                controlValueDict = {"BookID": bookid}
                newValueDict = {
                    "Series": series,
                    "SeriesNum": seriesNum
                }
                myDB.upsert("books", newValueDict, controlValueDict)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Example #20
0
    def find_results(self, authorname=None, queue=None):

        myDB = database.DBConnection()
        resultlist = []
        # See if we should check ISBN field, otherwise ignore it
        api_strings = ['inauthor:', 'intitle:']
        if is_valid_isbn(authorname):
            api_strings = ['isbn:']

        api_hits = 0
        logger.debug(
            'Now searching Google Books API with keyword: ' +
            self.name)

        for api_value in api_strings:
            startindex = 0
            if api_value == "isbn:":
                set_url = self.url + urllib.quote(api_value + self.name.encode(lazylibrarian.SYS_ENCODING))
            else:
                set_url = self.url + \
                    urllib.quote(api_value + '"' + self.name.encode(lazylibrarian.SYS_ENCODING) + '"')

            try:
                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0

                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits = api_hits + 1
                            number_results = jsonresults['totalItems']
                            logger.debug('Searching url: ' + URL)
                        if number_results == 0:
                            logger.warn(
                                'Found no results for %s with value: %s' %
                                (api_value, self.name))
                            break
                        else:
                            pass
                    except HTTPError as err:
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while' %
                            err.reason)
                        break

                    startindex = startindex + 40

                    for item in jsonresults['items']:

                        total_count = total_count + 1

                        # skip if no author, no author is no book.
                        try:
                            Author = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            no_author_count = no_author_count + 1
                            continue

                        valid_langs = ([valid_lang.strip()
                                       for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

                        if "All" not in valid_langs:  # don't care about languages, accept all
                            try:
                                # skip if language is not in valid list -
                                booklang = item['volumeInfo']['language']
                                if booklang not in valid_langs:
                                    logger.debug(
                                        'Skipped a book with language %s' %
                                        booklang)
                                    ignored = ignored + 1
                                    continue
                            except KeyError:
                                ignored = ignored + 1
                                logger.debug(
                                    'Skipped a result where no language is found')
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = None

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = None

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'
                        bookdate = bookdate[:4]

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = '0'

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = None

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = 'Not available'

                        try:
                            num_reviews = item['volumeInfo']['ratingsCount']
                        except KeyError:
                            num_reviews = 0

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = 0
                        except KeyError:
                            bookisbn = 0

                        author_fuzz = fuzz.token_set_ratio(Author, authorname)
                        book_fuzz = fuzz.token_set_ratio(
                            item['volumeInfo']['title'],
                            authorname)

                        isbn_fuzz = 0
                        if is_valid_isbn(authorname):
                            isbn_fuzz = 100

                        highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz)

                        bookname = item['volumeInfo']['title']
                        dic = {':': '', '"': '', '\'': ''}
                        bookname = replace_all(bookname, dic)

                        bookname = unaccented(bookname)
                        bookname = bookname.strip()  # strip whitespace
                        bookid = item['id']

                        author = myDB.select(
                            'SELECT AuthorID FROM authors WHERE AuthorName = "%s"' %
                            Author.replace('"', '""'))
                        if author:
                            AuthorID = author[0]['authorid']
                        else:
                            AuthorID = ''

                        resultlist.append({
                            'authorname': Author,
                            'authorid': AuthorID,
                            'bookid': bookid,
                            'bookname': bookname,
                            'booksub': booksub,
                            'bookisbn': bookisbn,
                            'bookpub': bookpub,
                            'bookdate': bookdate,
                            'booklang': booklang,
                            'booklink': item['volumeInfo']['canonicalVolumeLink'],
                            'bookrate': float(bookrate),
                            'bookimg': bookimg,
                            'bookpages': bookpages,
                            'bookgenre': bookgenre,
                            'bookdesc': bookdesc,
                            'author_fuzz': author_fuzz,
                            'book_fuzz': book_fuzz,
                            'isbn_fuzz': isbn_fuzz,
                            'highest_fuzz': highest_fuzz,
                            'num_reviews': num_reviews
                        })

                        resultcount = resultcount + 1

            except KeyError:
                break

        logger.debug("Found %s total result%s" % (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s" % (ignored, plural(ignored)))
        logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
        logger.debug(
            "Showing %s result%s for (%s) with keyword: %s" %
            (resultcount, plural(resultcount), api_value, authorname))
        logger.debug(
            'The Google Books API was hit %s time%s for keyword %s' %
            (api_hits, plural(api_hits), self.name))
        queue.put(resultlist)
Example #21
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):

        logger.debug('[%s] Now processing books with Google Books API' % authorname)
        # google doesnt like accents in author names
        set_url = self.url + urllib.quote('inauthor:"%s"' % unaccented_str(authorname))
        URL = set_url + '&' + urllib.urlencode(self.params)

        books_dict = []
        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

            while startindex < number_results:

                self.params['startIndex'] = startindex
                URL = set_url + '&' + urllib.urlencode(self.params)

                try:
                    jsonresults, in_cache = get_json_request(URL, useCache=not refresh)
                    if jsonresults is None:
                        number_results = 0
                    else:
                        if not in_cache:
                            api_hits = api_hits + 1
                        number_results = jsonresults['totalItems']
                except HTTPError as err:
                    logger.warn(
                        'Google Books API Error [%s]: Check your API key or wait a while' %
                        err.reason)
                    break

                if number_results == 0:
                    logger.warn('Found no results for %s' % authorname)
                    break
                else:
                    logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                startindex = startindex + 40

                for item in jsonresults['items']:

                    total_count = total_count + 1

                    # skip if no author, no author is no book.
                    try:
                        Author = item['volumeInfo']['authors'][0]
                    except KeyError:
                        logger.debug('Skipped a result without authorfield.')
                        continue

                    try:
                        if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                            bookisbn = item['volumeInfo'][
                                'industryIdentifiers'][0]['identifier']
                        else:
                            bookisbn = ""
                    except KeyError:
                        bookisbn = ""

                    isbnhead = ""
                    if len(bookisbn) == 10:
                        isbnhead = bookisbn[0:3]

                    try:
                        booklang = item['volumeInfo']['language']
                    except KeyError:
                        booklang = "Unknown"

                    # do we care about language?
                    if "All" not in valid_langs:
                        if bookisbn != "":
                            # seems google lies to us, sometimes tells us books
                            # are in english when they are not
                            if booklang == "Unknown" or booklang == "en":
                                googlelang = booklang
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' %
                                                   (isbnhead))
                                if (match):
                                    booklang = match['lang']
                                    cache_hits = cache_hits + 1
                                    logger.debug(
                                        "Found cached language [%s] for [%s]" %
                                        (booklang, isbnhead))

                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # librarything returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + \
                                        bookisbn
                                    try:
                                        librarything_wait()
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits = lt_lang_hits + 1
                                        logger.debug(
                                            "LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if (resp != 'invalid' and resp != 'unknown'):
                                            booklang = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, booklang))
                                            logger.debug(u"LT language: " + booklang)
                                    except Exception as e:
                                        booklang = ""
                                        logger.error("Error finding language: %s" % str(e))

                                if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                    # these are all english, may need to expand
                                    # this list
                                    booknamealt = item['volumeInfo']['title']
                                    logger.debug("%s Google thinks [%s], we think [%s]" %
                                                 (booknamealt, googlelang, booklang))
                                    gb_lang_change = gb_lang_change + 1
                            else:
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' %
                                                   (isbnhead))
                                if (not match):
                                    myDB.action(
                                        'insert into languages values ("%s", "%s")' %
                                        (isbnhead, booklang))
                                    logger.debug(u"GB language: " + booklang)

                        # skip if language is in ignore list
                        if booklang not in valid_langs:
                            booknamealt = item['volumeInfo']['title']
                            logger.debug(
                                'Skipped [%s] with language %s' %
                                (booknamealt, booklang))
                            ignored = ignored + 1
                            continue

                    try:
                        bookpub = item['volumeInfo']['publisher']
                    except KeyError:
                        bookpub = None

                    try:
                        booksub = item['volumeInfo']['subtitle']
                    except KeyError:
                        booksub = None

                    if booksub is None:
                        series = None
                        seriesNum = None
                    else:
                        try:
                            series = booksub.split('(')[1].split(' Series ')[0]
                        except IndexError:
                            series = None
                        try:
                            seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                            if seriesNum[0] == '#':
                                seriesNum = seriesNum[1:]
                        except IndexError:
                            seriesNum = None

                    try:
                        bookdate = item['volumeInfo']['publishedDate']
                    except KeyError:
                        bookdate = '0000-00-00'

                    try:
                        bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                    except KeyError:
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = item['volumeInfo']['averageRating']
                    except KeyError:
                        bookrate = 0

                    try:
                        bookpages = item['volumeInfo']['pageCount']
                    except KeyError:
                        bookpages = 0

                    try:
                        bookgenre = item['volumeInfo']['categories'][0]
                    except KeyError:
                        bookgenre = None

                    try:
                        bookdesc = item['volumeInfo']['description']
                    except KeyError:
                        bookdesc = None

                    bookname = item['volumeInfo']['title']
                    bookname = unaccented(bookname)
                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace

                    booklink = item['volumeInfo']['canonicalVolumeLink']
                    bookrate = float(bookrate)
                    bookid = item['id']

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    #
                    # Not sure if googlebooks does too, but we only want one...
                    find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False
                    if re.match('[^\w-]', bookname):  # remove books with bad characters in title
                        logger.debug("[%s] removed book for bad characters" % bookname)
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug('Rejecting bookid %s for %s, no bookname' %
                                     (bookid, authorname))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' %
                                                 (bookname.replace('"', '""'), authorname.replace('"', '""')))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (find_book['BookID'], authorname, bookname, bookid))
                                    rejected = True
                                    duplicates = duplicates + 1

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            logger.debug('Rejecting bookid %s for [%s][%s] already got this bookid in database' %
                                         (bookid, authorname, bookname))
                            duplicates = duplicates + 1
                            rejected = True

                    if not rejected:
                        if book_status != "Ignored" and not locked:
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName": authorname,
                                "AuthorID": authorid,
                                "AuthorLink": "",
                                "BookName": bookname,
                                "BookSub": booksub,
                                "BookDesc": bookdesc,
                                "BookIsbn": bookisbn,
                                "BookPub": bookpub,
                                "BookGenre": bookgenre,
                                "BookImg": bookimg,
                                "BookLink": booklink,
                                "BookRate": bookrate,
                                "BookPages": bookpages,
                                "BookDate": bookdate,
                                "BookLang": booklang,
                                "Status": book_status,
                                "BookAdded": today(),
                                "Series": series,
                                "SeriesNum": seriesNum
                            }
                            resultcount = resultcount + 1

                            myDB.upsert("books", newValueDict, controlValueDict)
                            logger.debug(u"Book found: " + bookname + " " + bookdate)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link is not None:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            if seriesNum is None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict, controlValueDict)

                            if not find_book_status:
                                logger.debug("[%s] Added book: %s [%s]" % (authorname, bookname, booklang))
                                added_count = added_count + 1
                            else:
                                updated_count = updated_count + 1
                                logger.debug("[%s] Updated book: %s" % (authorname, bookname))
                        else:
                            book_ignore_count = book_ignore_count + 1
        except KeyError:
            pass

        logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                     (authorname, api_hits, plural(api_hits)))

        lastbook = myDB.match('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                               AND Status != "Ignored" order by BookDate DESC' % authorid)

        if lastbook:  # maybe there are no books [remaining] for this author
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }

        myDB.upsert("authors", newValueDict, controlValueDict)

        logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
        logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored)))
        logger.debug(
            "Removed %s bad character or no-name result%s for author" %
            (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
        logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

        myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                    (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits,
                     ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                        (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                        (authorname, added_count, plural(added_count)))
        return books_dict
Example #22
0
def search_rss_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        if not internet():
            logger.warn('Search RSS Book: No internet connection')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverGoodReads()
        if not wishproviders:
            logger.debug('No rss wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            for book in resultlist:
                # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                logger.debug('Processing %s item%s in wishlists' %
                             (len(resultlist), plural(len(resultlist))))
                if book['rss_bookid'] and lazylibrarian.CONFIG[
                        'BOOK_API'] == "GoodReads":
                    bookmatch = myDB.match(
                        'select Status,BookName from books where bookid="%s"' %
                        book['rss_bookid'])
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s, already marked as "%s"' %
                                (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s, marking as "Wanted"' %
                                        bookname)
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    else:
                        import_book(book['rss_bookid'])
                else:
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                        headers.append('BookID')
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                        headers.append('ISBN')
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s by %s, already marked as "%s"'
                                % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                u'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]
                            if result['isbn_fuzz'] > lazylibrarian.CONFIG[
                                    'MATCH_RATIO']:
                                logger.info(
                                    "Found (%s%%) %s: %s" %
                                    (result['isbn_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (
                                item['Title'],
                                formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                                logger.info(
                                    "Found (%s%% %s%%) %s: %s" %
                                    (result['author_fuzz'],
                                     result['book_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                                                          book['rss_author'])
                        # noinspection PyUnboundLocalVariable
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                            logger.warn(msg)

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors '
            cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc'
            searchbooks = myDB.select(cmd)

        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors '
                cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[
                    'bookid']
                cmd += 'AND books.Status="Wanted"'
                searchbook = myDB.select(cmd)
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders:
            if not wishproviders:
                logger.warn('No rss providers are set, check config')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        rss_count = 0
        for book in searchbooks:
            authorname, bookname = get_searchterm(book, "book")
            found = processResultList(resultlist, authorname, bookname, book,
                                      'book')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname,
                                          book, 'shortbook')

            if not found:
                logger.debug(
                    "Searches returned no results. Adding book %s - %s to queue."
                    % (authorname, bookname))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

        if reset:
            scheduleJob(action='Restart', target='search_rss_book')

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
                     traceback.format_exc())
Example #23
0
def GEN(book=None, prov=None):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urllib.urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug(u"No results found from %s for %s" %
                             (provider, book['searchterm']))
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if result:
            logger.debug(u'Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result)
                try:
                    table = soup.findAll('table')[2]  # un-named table
                    if table:
                        rows = table.findAll('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if 'search.php' in search and len(rows) > 1:
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.findAll('td')
                    if 'index.php' in search and len(td) > 3:
                        try:
                            res = str(
                                BeautifulStoneSoup(
                                    td[0].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            author = formatAuthorName(res)
                            title = str(
                                BeautifulStoneSoup(
                                    td[2].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            temp = str(td[4])
                            temp = temp.split('onmouseout')[1]
                            extn = temp.split('">')[1].split('(')[0]
                            size = temp.split('">')[1].split('(')[1].split(
                                ')')[0]
                            size = size.upper()
                            link = temp.split('href=')[1].split('"')[1]
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            res = str(
                                BeautifulStoneSoup(
                                    td[1].text,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            author = formatAuthorName(res)
                            title = str(
                                td[2]).split('>')[2].split('<')[0].strip()
                            title = str(
                                BeautifulStoneSoup(
                                    title,
                                    convertEntities=BeautifulStoneSoup.
                                    HTML_ENTITIES))
                            link = str(td[2]).split('href="')[1].split(
                                '?')[1].split('"')[0]
                            size = unaccented(td[7].text).upper()
                            extn = td[8].text
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    if not size:
                        size = 0
                    else:
                        try:
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if not link.startswith('http'):
                            if "/ads.php?" in link:
                                url = url_fix(host + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)
                        else:
                            url = redirect_url(host, link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            # may return 404 if no results, not really an error
                            if '404' in bookresult:
                                logger.debug(
                                    u"No results found from %s for %s" %
                                    (provider, book['searchterm']))
                            else:
                                logger.debug(url)
                                logger.debug(
                                    'Error fetching link data from %s: %s' %
                                    (provider, bookresult))
                                errmsg = bookresult
                            bookresult = False

                        if bookresult:
                            url = None
                            try:
                                new_soup = BeautifulSoup(bookresult)
                                for link in new_soup.findAll('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.debug(
                                    'Error parsing bookresult for %s: %s' %
                                    (link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error(u"An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug(
        u"Found %i result%s from %s for %s" %
        (len(results), plural(len(results)), provider, book['searchterm']))
    return results, errmsg
Example #24
0
def dbupgrade(db_current_version):
    try:
        myDB = database.DBConnection()
        db_version = 0
        result = myDB.match('PRAGMA user_version')
        if result and result[0]:
            value = str(result[0])
            if value.isdigit():
                db_version = int(value)

        check = myDB.match('PRAGMA integrity_check')
        if check and check[0]:
            result = check[0]
            if result == 'ok':
                logger.debug('Database integrity check: %s' % result)
            else:
                logger.error('Database integrity check: %s' % result)
                # should probably abort now

        if db_version < db_current_version:
            myDB = database.DBConnection()

            if db_version < 1:
                if not has_column(myDB, "authors", "AuthorID"):
                    # it's a new database. Create tables but no need for any upgrading
                    db_version = db_current_version
                    lazylibrarian.UPDATE_MSG = 'Creating new database, version %s' % db_version
                else:
                    lazylibrarian.UPDATE_MSG = 'Updating database to version %s, current version is %s' % (
                        db_current_version, db_version)
                logger.info(lazylibrarian.UPDATE_MSG)
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS authors (AuthorID TEXT UNIQUE, AuthorName TEXT UNIQUE, \
                AuthorImg TEXT, AuthorLink TEXT, DateAdded TEXT, Status TEXT, LastBook TEXT, LastBookImg TEXT, \
                LastLink Text, LastDate TEXT,  HaveBooks INTEGER, TotalBooks INTEGER, AuthorBorn TEXT, \
                AuthorDeath TEXT, UnignoredBooks INTEGER, Manual TEXT)')
                myDB.action('CREATE TABLE IF NOT EXISTS books (AuthorID TEXT, \
                BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, BookFile TEXT, \
                BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, Manual TEXT)'
                            )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS wanted (BookID TEXT, NZBurl TEXT, NZBtitle TEXT, NZBdate TEXT, \
                NZBprov TEXT, Status TEXT, NZBsize TEXT, AuxInfo TEXT, NZBmode TEXT, Source TEXT, DownloadID TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS pastissues AS SELECT * FROM wanted WHERE 0'
                )  # same columns
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS magazines (Title TEXT UNIQUE, Regex TEXT, Status TEXT, \
                MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT, \
                LatestCover TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS languages (isbn TEXT, lang TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS issues (Title TEXT, IssueID TEXT UNIQUE, IssueAcquired TEXT, \
                IssueDate TEXT, IssueFile TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS stats (authorname text, GR_book_hits int, GR_lang_hits int, \
                LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int, \
                duplicates int)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                Status TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS seriesauthors (SeriesID INTEGER, AuthorID TEXT)'
                )

            # These are the incremental changes before database versioning was introduced.
            # Old database tables might already have these incorporated depending on version, so we need to check...
            if db_version < 1:

                if not has_column(myDB, "books", "BookSub"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book subtitles.'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookSub TEXT')

                if not has_column(myDB, "books", "BookSub"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book publisher'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookPub TEXT')

                if not has_column(myDB, "books", "BookGenre"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold bookgenre'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookGenre TEXT')

                if not has_column(myDB, "books", "BookFile"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book filename'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookFile TEXT')

                if not has_column(myDB, "wanted", "AuxInfo"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold AuxInfo'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN AuxInfo TEXT')

                if not has_column(myDB, "wanted", "NZBsize"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBsize'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN NZBsize TEXT')

                if not has_column(myDB, "wanted", "NZBmode"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBmode'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN NZBmode TEXT')

                if not has_column(myDB, "authors", "UnignoredBooks"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold UnignoredBooks'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE authors ADD COLUMN UnignoredBooks INTEGER'
                    )

                if not has_column(myDB, "magazines", "IssueStatus"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold IssueStatus'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE magazines ADD COLUMN IssueStatus TEXT')

                addedWorkPage = False
                if not has_column(myDB, "books", "WorkPage"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold WorkPage'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN WorkPage TEXT')
                    addedWorkPage = True

                addedSeries = False
                if not has_column(myDB, "series",
                                  "SeriesID") and not has_column(
                                      myDB, "books", "Series"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold Series'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN Series TEXT')
                    addedSeries = True

                # SeriesOrder shouldn't be an integer, some later written books
                # and novellas logically go inbetween books of the main series,
                # and their SeriesOrder is not an integer, eg 1.5
                # so we need to update SeriesOrder to store as text.
                # Because sqlite can't drop columns we create a new column SeriesNum,
                # inherit the old column values, and use SeriesNum instead
                if not has_column(myDB, "books", "SeriesNum") and has_column(
                        myDB, "books", "SeriesOrder"):
                    # no SeriesNum column, so create one
                    lazylibrarian.UPDATE_MSG = 'Updating books to hold SeriesNum'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN SeriesNum TEXT')
                    myDB.action('UPDATE books SET SeriesNum = SeriesOrder')
                    myDB.action('UPDATE books SET SeriesOrder = Null')

                addedIssues = False
                if not has_column(myDB, "issues", "Title"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold Issues table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE issues (Title TEXT, IssueID TEXT, IssueAcquired TEXT, IssueDate TEXT, IssueFile TEXT)'
                    )
                    addedIssues = True

                if not has_column(myDB, "issues", "IssueID"):
                    lazylibrarian.UPDATE_MSG = 'Updating Issues table to hold IssueID'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE issues ADD COLUMN IssueID TEXT')
                    addedIssues = True

                myDB.action('DROP TABLE if exists capabilities')

                if addedIssues:
                    try:
                        magazinescan.magazineScan()
                    except Exception as e:
                        logger.debug("Failed to scan magazines, %s" % str(e))

                if addedWorkPage:
                    try:
                        lazylibrarian.UPDATE_MSG = 'Adding WorkPage to existing books'
                        logger.debug(lazylibrarian.UPDATE_MSG)
                        threading.Thread(target=bookwork.setWorkPages,
                                         name="ADDWORKPAGE",
                                         args=[]).start()
                    except Exception as e:
                        logger.debug("Failed to update WorkPages, %s" % str(e))

                if addedSeries:
                    try:
                        books = myDB.select(
                            'SELECT BookID, BookName FROM books')
                        if books:
                            lazylibrarian.UPDATE_MSG = 'Adding series to existing books'
                            logger.debug(lazylibrarian.UPDATE_MSG)
                            tot = len(books)
                            cnt = 0
                            for book in books:
                                cnt += 1
                                lazylibrarian.UPDATE_MSG = 'Adding series to existing books: %s of %s' % (
                                    cnt, tot)
                                series, seriesNum = bookSeries(
                                    book["BookName"])
                                if series:
                                    controlValueDict = {
                                        "BookID": book["BookID"]
                                    }
                                    newValueDict = {
                                        "series": series,
                                        "seriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)
                    except Exception as e:
                        logger.error('Error: ' + str(e))

            if db_version < 2:
                try:
                    results = myDB.select(
                        'SELECT BookID,NZBsize FROM wanted WHERE NZBsize LIKE "% MB"'
                    )
                    if results:
                        lazylibrarian.UPDATE_MSG = 'Removing units from wanted table'
                        logger.debug(lazylibrarian.UPDATE_MSG)
                        tot = len(results)
                        cnt = 0
                        for units in results:
                            cnt += 1
                            lazylibrarian.UPDATE_MSG = 'Removing units from wanted table: %s of %s' % (
                                cnt, tot)
                            nzbsize = units["NZBsize"]
                            nzbsize = nzbsize.split(' ')[0]
                            myDB.action(
                                'UPDATE wanted SET NZBsize = "%s" WHERE BookID = "%s"'
                                % (nzbsize, units["BookID"]))

                except Exception as e:
                    logger.error('Error: ' + str(e))

            if db_version < 3:
                if has_column(myDB, "books", "SeriesOrder"):
                    lazylibrarian.UPDATE_MSG = 'Removing SeriesOrder from books table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_books (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \
                    SeriesNum TEXT, WorkPage TEXT)')
                    myDB.action(
                        'INSERT INTO temp_books SELECT AuthorID,AuthorName,AuthorLink,BookName,BookSub, \
                    BookDesc,BookGenre,BookIsbn,BookPub,BookRate,BookImg,BookPages,BookLink,BookID, \
                    BookFile,BookDate,BookLang,BookAdded,Status,Series,SeriesNum,WorkPage FROM books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_books RENAME TO books')

                if not has_column(myDB, "pastissues", "BookID"):
                    lazylibrarian.UPDATE_MSG = 'Moving magazine past issues into new table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0'
                    )
                    myDB.action(
                        'DELETE FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0'
                    )

            if db_version < 4:
                if not has_column(myDB, "stats", "duplicates"):
                    lazylibrarian.UPDATE_MSG = 'Updating stats table to hold duplicates'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE stats ADD COLUMN duplicates INT')

            if db_version < 5:
                issues = myDB.select(
                    'SELECT IssueID,IssueDate from issues WHERE length(IssueDate) < 4 and length(IssueDate) > 0'
                )
                if issues:
                    lazylibrarian.UPDATE_MSG = 'Updating issues table to hold 4 digit issue numbers'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    tot = len(issues)
                    cnt = 0
                    for issue in issues:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = 'Updating issues table 4 digits: %s of %s' % (
                            cnt, tot)
                        issueid = issue['IssueID']
                        issuedate = str(issue['IssueDate'])
                        issuedate = issuedate.zfill(4)
                        myDB.action(
                            'UPDATE issues SET IssueDate="%s" WHERE IssueID="%s"'
                            % (issuedate, issueid))

                mags = myDB.select(
                    'SELECT Title,IssueDate from magazines WHERE length(IssueDate) < 4 and length(IssueDate) > 0'
                )
                if mags:
                    lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    tot = len(mags)
                    cnt = 0
                    for mag in mags:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits: %s of %s' % (
                            cnt, tot)
                        title = mag['Title']
                        issuedate = str(mag['IssueDate'])
                        issuedate = issuedate.zfill(4)
                        myDB.action(
                            'UPDATE magazines SET IssueDate="%s" WHERE Title="%s"'
                            % (issuedate, title))

            if db_version < 6:
                if not has_column(myDB, "books", "Manual"):
                    lazylibrarian.UPDATE_MSG = 'Updating books table to hold Manual setting'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN Manual TEXT')

            if db_version < 7:
                if not has_column(myDB, "wanted", "Source"):
                    lazylibrarian.UPDATE_MSG = 'Updating wanted table to hold Source and DownloadID'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN Source TEXT')
                    myDB.action(
                        'ALTER TABLE wanted ADD COLUMN DownloadID TEXT')

            if db_version < 8:
                src = os.path.join(lazylibrarian.PROG_DIR,
                                   'data/images/cache/')
                dst = lazylibrarian.CACHEDIR
                images = myDB.select(
                    'SELECT AuthorID, AuthorImg FROM authors WHERE AuthorImg LIKE "images/cache/%"'
                )
                if images:
                    logger.debug('Moving author images to new location')
                    tot = len(images)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % (
                            cnt, tot)
                        img = image['AuthorImg']
                        img = img[7:]
                        myDB.action(
                            'UPDATE authors SET AuthorImg="%s" WHERE AuthorID="%s"'
                            % (img, image['AuthorID']))
                        img = img[6:]
                        srcfile = os.path.join(src, img)
                        if os.path.isfile(srcfile):
                            try:
                                shutil.move(os.path.join(src, img),
                                            os.path.join(dst, img))
                            except Exception as e:
                                logger.warn("dbupgrade: %s" % str(e))
                    logger.debug("Author Image cache updated")

                images = myDB.select(
                    'SELECT BookID, BookImg FROM books WHERE BookImg LIKE "images/cache/%"'
                )
                if images:
                    logger.debug('Moving book images to new location')
                    tot = len(images)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % (
                            cnt, tot)
                        img = image['BookImg']
                        img = img[7:]
                        myDB.action(
                            'UPDATE books SET BookImg="%s" WHERE BookID="%s"' %
                            (img, image['BookID']))
                        img = img[6:]
                        srcfile = os.path.join(src, img)
                        if os.path.isfile(srcfile):
                            try:
                                shutil.move(srcfile, os.path.join(dst, img))
                            except Exception as e:
                                logger.warn("dbupgrade: %s" % str(e))
                    logger.debug("Book Image cache updated")

            if db_version < 9:
                if not has_column(myDB, "magazines", "Reject"):
                    # remove frequency column, rename regex to reject, add new regex column for searches
                    lazylibrarian.UPDATE_MSG = 'Updating magazines table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (Title TEXT, Regex TEXT, Status TEXT, \
                            MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT)'
                    )
                    myDB.action(
                        'INSERT INTO temp_table SELECT Title, Regex, Status, MagazineAdded, LastAcquired, \
                            IssueDate, IssueStatus, Regex FROM magazines')
                    myDB.action('DROP TABLE magazines')
                    myDB.action('ALTER TABLE temp_table RENAME TO magazines')
                    myDB.action('UPDATE magazines SET Regex = Null')

            if db_version < 10:
                # make sure columns in pastissues match those in wanted table
                # needed when upgrading from old 3rd party packages (eg freenas)
                myDB.action('DROP TABLE pastissues')
                myDB.action(
                    'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE 0'
                )  # same columns, but empty table

            if db_version < 11:
                # keep last book image
                if not has_column(myDB, "authors", "LastBookImg"):
                    lazylibrarian.UPDATE_MSG = 'Updating author table to hold last book image'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE authors ADD COLUMN LastBookImg TEXT')
                    books = myDB.select(
                        'SELECT AuthorID, AuthorName, LastBook from authors')

                    if books:
                        for book in books:
                            lazylibrarian.UPDATE_MSG = 'Updating last book image for %s' % book[
                                'AuthorName']
                            if book['LastBook']:
                                match = myDB.match(
                                    'SELECT BookImg from books WHERE AuthorID="%s" AND BookName="%s"'
                                    % (book['AuthorID'], book['LastBook']))
                                if match:
                                    myDB.action(
                                        'UPDATE authors SET LastBookImg="%s" WHERE AuthorID=%s'
                                        % (match['BookImg'], book['AuthorID']))

            if db_version < 12:
                # keep last magazine issue image
                if not has_column(myDB, "Magazines", "LatestCover"):
                    lazylibrarian.UPDATE_MSG = 'Updating magazine table to hold last issue image'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE magazines ADD COLUMN LatestCover TEXT')
                    mags = myDB.select(
                        'SELECT Title, LastAcquired from magazines')

                    if mags:
                        for mag in mags:
                            lazylibrarian.UPDATE_MSG = 'Updating last issue image for %s' % mag[
                                'Title']
                            match = myDB.match(
                                'SELECT IssueFile from issues WHERE IssueAcquired="%s" AND Title="%s"'
                                % (mag['LastAcquired'], mag['Title']))
                            if match:
                                coverfile = os.path.splitext(
                                    match['IssueFile'])[0] + '.jpg'
                                if os.path.exists(coverfile):
                                    myDB.action(
                                        'UPDATE magazines SET LatestCover="%s" WHERE Title="%s"'
                                        % (coverfile, mag['Title']))

            if db_version < 13:
                if not has_column(myDB, "authors", "Manual"):
                    lazylibrarian.UPDATE_MSG = 'Updating authors table to hold Manual setting'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE authors ADD COLUMN Manual TEXT')

            if db_version < 14:
                src = lazylibrarian.CACHEDIR
                try:
                    os.mkdir(os.path.join(src, 'author'))
                except OSError as e:
                    if e.errno is not 17:  # already exists is ok
                        logger.debug('mkdir author cache reports: %s' % str(e))

                query = 'SELECT AuthorName, AuthorID, AuthorImg FROM authors '
                query += 'WHERE AuthorImg LIKE "cache/%" '
                query += 'AND AuthorImg NOT LIKE "cache/author/%"'

                images = myDB.select(query)
                if images:
                    tot = len(images)
                    logger.debug('Moving %s author images to new location' %
                                 tot)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % (
                            cnt, tot)
                        try:
                            img = image['AuthorImg']
                            img = img.rsplit('/', 1)[1]
                            srcfile = os.path.join(src, img)
                            if os.path.isfile(srcfile):
                                try:
                                    shutil.move(
                                        srcfile,
                                        os.path.join(src, "author", img))
                                    myDB.action(
                                        'UPDATE authors SET AuthorImg="cache/author/%s" WHERE AuthorID="%s"'
                                        % (img, image['AuthorID']))
                                except Exception as e:
                                    logger.warn("dbupgrade: %s" % str(e))
                        except Exception as e:
                            logger.warn(
                                'Failed to update author image for %s: %s' %
                                (image['AuthorName'], str(e)))
                    logger.debug("Author Image cache updated")

                try:
                    os.mkdir(os.path.join(src, 'book'))
                except OSError as e:
                    if e.errno is not 17:  # already exists is ok
                        logger.debug('mkdir book cache reports: %s' % str(e))

                query = 'SELECT BookName, BookID, BookImg FROM books '
                query += 'WHERE BookImg LIKE "cache/%" '
                query += 'AND BookImg NOT LIKE "cache/book/%"'
                images = myDB.select(query)

                if images:
                    tot = len(images)
                    logger.debug('Moving %s book images to new location' % tot)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % (
                            cnt, tot)
                        try:
                            img = image['BookImg']
                            img = img.rsplit('/', 1)[1]
                            srcfile = os.path.join(src, img)
                            if os.path.isfile(srcfile):
                                try:
                                    shutil.move(srcfile,
                                                os.path.join(src, "book", img))
                                    myDB.action(
                                        'UPDATE books SET BookImg="cache/book/%s" WHERE BookID="%s"'
                                        % (img, image['BookID']))
                                except Exception as e:
                                    logger.warn("dbupgrade: %s" % str(e))
                        except Exception as e:
                            logger.warn(
                                'Failed to update book image for %s: %s' %
                                (image['BookName'], str(e)))
                    logger.debug("Book Image cache updated")

                # at this point there should be no more .jpg files in the root of the cachedir
                # any that are still there are for books/authors deleted from database
                # or magazine latest issue cover files that get copied as required
                for image in os.listdir(src):
                    if image.endswith('.jpg'):
                        os.remove(os.path.join(src, image))

            if db_version < 15:
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                        AuthorID TEXT, Status TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)'
                )
                if has_column(myDB, "books", "SeriesNum"):
                    lazylibrarian.UPDATE_MSG = 'Populating series and member tables'
                    books = myDB.select(
                        'SELECT BookID, Series, SeriesNum from books')
                    if books:
                        tot = len(books)
                        logger.debug("Updating book series for %s book%s" %
                                     (tot, plural(tot)))
                        cnt = 0
                        for book in books:
                            cnt += 1
                            lazylibrarian.UPDATE_MSG = "Updating book series: %s of %s" % (
                                cnt, tot)
                            seriesdict = getWorkSeries(book['BookID'])
                            if not seriesdict:  # no workpage series, use the current values if present
                                if book['Series'] and book['SeriesNum']:
                                    seriesdict = {
                                        cleanName(unaccented(book['Series'])):
                                        book['SeriesNum']
                                    }
                            setSeries(seriesdict,
                                      book['BookID'],
                                      seriesauthors=False)
                        # deleteEmptySeries  # shouldn't be any on first run?
                        lazylibrarian.UPDATE_MSG = "Book series update complete"
                        logger.debug(lazylibrarian.UPDATE_MSG)

                    lazylibrarian.UPDATE_MSG = 'Removing seriesnum from books table'
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \
                    WorkPage TEXT, Manual TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT AuthorID, AuthorName, AuthorLink, BookName, BookSub, \
                    BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \
                    BookFile, BookDate, BookLang, BookAdded, Status, Series, WorkPage, Manual from books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_table RENAME TO books')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete'

            if db_version < 16:
                if has_column(myDB, "books", "AuthorLink"):
                    lazylibrarian.UPDATE_MSG = 'Removing series, authorlink and authorname from books table'
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, \
                    Manual TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT AuthorID, BookName, BookSub, \
                    BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \
                    BookFile, BookDate, BookLang, BookAdded, Status, WorkPage, Manual from books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_table RENAME TO books')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete'

            if db_version < 17:
                if has_column(myDB, "series", "AuthorID"):
                    lazylibrarian.UPDATE_MSG = 'Creating seriesauthors table'
                    # In this version of the database there is only one author per series so use that as starting point
                    myDB.action(
                        'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))'
                    )
                    series = myDB.select(
                        'SELECT SeriesID,AuthorID from series')
                    cnt = 0
                    tot = len(series)
                    for item in series:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % (
                            cnt, tot)
                        myDB.action(
                            'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)'
                            % (item['SeriesID'], item['AuthorID']),
                            suppress='UNIQUE')

                    myDB.action('DROP TABLE temp_table')
                    myDB.action(
                        'CREATE TABLE temp_table (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                    Status TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT  SeriesID, SeriesName, Status FROM series'
                    )
                    myDB.action('DROP TABLE series')
                    myDB.action('ALTER TABLE temp_table RENAME TO series')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of series table complete'

            if db_version < 18:
                data = myDB.match('pragma index_list(seriesauthors)')
                if not data:
                    lazylibrarian.UPDATE_MSG = 'Adding unique constraint to seriesauthors table'
                    myDB.action('DROP TABLE IF EXISTS temp_table')
                    myDB.action(
                        'ALTER TABLE seriesauthors RENAME to temp_table')
                    myDB.action(
                        'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))'
                    )
                    series = myDB.select(
                        'SELECT SeriesID,AuthorID from temp_table')
                    cnt = 0
                    tot = len(series)
                    for item in series:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % (
                            cnt, tot)
                        myDB.action(
                            'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)'
                            % (item['SeriesID'], item['AuthorID']),
                            suppress='UNIQUE')
                    myDB.action('DROP TABLE temp_table')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of seriesauthors complete'

            # Now do any non-version-specific tidying
            try:
                authors = myDB.select(
                    'SELECT AuthorID FROM authors WHERE AuthorName IS NULL')
                if authors:
                    logger.debug(
                        'Removing %s un-named author%s from database' %
                        (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        myDB.action('DELETE from authors WHERE AuthorID="%s"' %
                                    authorid)
                        myDB.action('DELETE from books WHERE AuthorID="%s"' %
                                    authorid)
            except Exception as e:
                logger.error('Error: ' + str(e))

            myDB.action('PRAGMA user_version = %s' % db_current_version)
            lazylibrarian.UPDATE_MSG = 'Cleaning Database after upgrade'
            myDB.action('vacuum')
            lazylibrarian.UPDATE_MSG = 'Database updated to version %s' % db_current_version
            logger.info(lazylibrarian.UPDATE_MSG)

            restartJobs(start='Start')

        lazylibrarian.UPDATE_MSG = ''

    except Exception:
        logger.error('Unhandled exception in database update: %s' %
                     traceback.format_exc())
        lazylibrarian.UPDATE_MSG = ''
Example #25
0
def KAT(book=None, test=False):
    errmsg = ''
    provider = "KAT"
    host = lazylibrarian.CONFIG['KAT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + quote(book['searchterm']))

    params = {"category": "books", "field": "seeders", "sorder": "desc"}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
            errmsg = result
        result = False

    if test:
        return success

    results = []

    if result:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
        soup = BeautifulSoup(result, 'html5lib')
        rows = []
        try:
            table = soup.find_all('table')[1]  # un-named table
            if table:
                rows = table.find_all('tr')
        except IndexError:  # no results table in result page
            rows = []

        if len(rows) > 1:
            rows = rows[1:]  # first row is headers

        for row in rows:
            td = row.find_all('td')
            if len(td) > 3:
                try:
                    title = unaccented(td[0].text)
                    # kat can return magnet or torrent or both.
                    magnet = ''
                    url = ''
                    mode = 'torrent'
                    try:
                        magnet = 'magnet' + str(
                            td[0]).split('href="magnet')[1].split('"')[0]
                        mode = 'magnet'
                    except IndexError:
                        pass
                    try:
                        url = 'http' + str(td[0]).split('href="http')[1].split(
                            '.torrent?')[0] + '.torrent'
                        mode = 'torrent'
                    except IndexError:
                        pass

                    if not url or (magnet and url
                                   and lazylibrarian.CONFIG['PREFER_MAGNET']):
                        url = magnet
                        mode = 'magnet'

                    try:
                        size = str(td[1].text).replace('&nbsp;', '').upper()
                        mult = 1
                        if 'K' in size:
                            size = size.split('K')[0]
                            mult = 1024
                        elif 'M' in size:
                            size = size.split('M')[0]
                            mult = 1024 * 1024
                        elif 'G' in size:
                            size = size.split('G')[0]
                            mult = 1024 * 1024 * 1024
                        size = int(float(size) * mult)
                    except (ValueError, IndexError):
                        size = 0
                    try:
                        seeders = int(td[3].text)
                    except ValueError:
                        seeders = 0

                    if not url or not title:
                        logger.debug('Missing url or title')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['KAT_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))
                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg
Example #26
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?',
                        (seriesid, ))
    seriesname = result['SeriesName']
    members, api_hits = getSeriesMembers(seriesid, seriesname)
    dic = {
        u'\u2018': "",
        u'\u2019': "",
        u'\u201c': '',
        u'\u201d': '',
        "'": "",
        '"': ''
    }

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            # pubyear = member[5]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if not in_cache:
                        api_hits += 1
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find(
                                        './best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find(
                                        './best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug(
                                    "Author Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(
                                lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(
                            params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if not in_cache:
                            api_hits += 1
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' %
                                        searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(
                                    booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find(
                                            './best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find(
                                            './best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug(
                                        "Title Search found %s %s, authorid %s"
                                        % (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" %
                                    (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" %
                                 (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth
Example #27
0
def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #28
0
def getWorkSeries(bookID=None):
    """ Return the series names and numbers in series for the given id as a list of tuples
        For goodreads the id is a WorkID, for librarything it's a BookID """
    myDB = database.DBConnection()
    serieslist = []
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return serieslist

    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        URL = "https://www.goodreads.com/work/"
        seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[
            'GR_API']

        rootxml, in_cache = gr_xml_request(seriesurl)
        if rootxml is None:
            logger.warn('Error getting XML for %s' % seriesurl)
        else:
            resultxml = rootxml.getiterator('series_work')
            for item in resultxml:
                try:
                    seriesname = item.find('./series/title').text
                    seriesname = seriesname.strip('\n').strip('\n').strip()
                    seriesid = item.find('./series/id').text
                    seriesnum = item.find('./user_position').text
                except (KeyError, AttributeError):
                    continue
                if seriesname and seriesid:
                    seriesname = cleanName(unaccented(seriesname), '&/')
                    if seriesname:
                        seriesnum = cleanName(unaccented(seriesnum))
                        serieslist.append((seriesid, seriesnum, seriesname))
                        match = myDB.match(
                            'SELECT SeriesID from series WHERE SeriesName=?',
                            (seriesname, ))
                        if not match:
                            match = myDB.match(
                                'SELECT SeriesName from series WHERE SeriesID=?',
                                (seriesid, ))
                            if not match:
                                myDB.action(
                                    'INSERT INTO series VALUES (?, ?, ?, ?, ?)',
                                    (seriesid, seriesname, "Active", 0, 0))
                            else:
                                logger.warn(
                                    "Name mismatch for series %s, [%s][%s]" %
                                    (seriesid, seriesname,
                                     match['SeriesName']))
                        elif match['SeriesID'] != seriesid:
                            myDB.action(
                                'UPDATE series SET SeriesID=? WHERE SeriesName=?',
                                (seriesid, seriesname))
    else:
        work = getBookWork(bookID, "Series")
        if work:
            try:
                slist = work.split('<h3><b>Series:')[1].split(
                    '</h3>')[0].split('<a href="/series/')
                for item in slist[1:]:
                    try:
                        series = item.split('">')[1].split('</a>')[0]
                        if series and '(' in series:
                            seriesnum = series.split('(')[1].split(
                                ')')[0].strip()
                            series = series.split(' (')[0].strip()
                        else:
                            seriesnum = ''
                            series = series.strip()
                        seriesname = cleanName(unaccented(series), '&/')
                        seriesnum = cleanName(unaccented(seriesnum))
                        if seriesname:
                            serieslist.append(('', seriesnum, seriesname))
                    except IndexError:
                        pass
            except IndexError:
                pass

    return serieslist
Example #29
0
def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {"q": book['searchterm']}
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg
Example #30
0
def syncCalibreList(col_read=None, col_toread=None, userid=None):
    """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs,
        and sync current/supplied user's read/toread or supplied read/toread columns to calibre database.
        Return message giving totals """

    myDB = database.DBConnection()
    if not userid:
        cookie = cherrypy.request.cookie
        if cookie and 'll_uid' in cookie.keys():
            userid = cookie['ll_uid'].value
    if userid:
        res = myDB.match(
            'SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?',
            (userid, ))
        if res:
            username = res['UserName']
            if not col_read:
                col_read = res['CalibreRead']
            if not col_toread:
                col_toread = res['CalibreToRead']
            toreadlist = getList(res['ToRead'])
            readlist = getList(res['HaveRead'])
            # suppress duplicates (just in case)
            toreadlist = list(set(toreadlist))
            readlist = list(set(readlist))
        else:
            return "Error: Unable to get user column settings for %s" % userid

    if not userid:
        return "Error: Unable to find current userid"

    if not col_read and not col_toread:
        return "User %s has no calibre columns set" % username

    # check user columns exist in calibre and create if not
    res = calibredb('custom_columns')
    columns = res[0].split('\n')
    custom_columns = []
    for column in columns:
        if column:
            custom_columns.append(column.split(' (')[0])

    if col_read not in custom_columns:
        added = calibredb('add_custom_column', [col_read, col_read, 'bool'])
        if "column created" not in added[0]:
            return added
    if col_toread not in custom_columns:
        added = calibredb('add_custom_column',
                          [col_toread, col_toread, 'bool'])
        if "column created" not in added[0]:
            return added

    nomatch = 0
    readcol = ''
    toreadcol = ''
    map_ctol = {}
    map_ltoc = {}
    if col_read:
        readcol = '*' + col_read
    if col_toread:
        toreadcol = '*' + col_toread

    calibre_list = calibreList(col_read, col_toread)
    if not isinstance(calibre_list, list):
        # got an error message from calibredb
        return '"%s"' % calibre_list

    for item in calibre_list:
        if toreadcol and toreadcol in item or readcol and readcol in item:
            authorname, authorid, added = addAuthorNameToDB(item['authors'],
                                                            refresh=False,
                                                            addbooks=False)
            if authorname:
                if authorname != item['authors']:
                    logger.debug(
                        "Changed authorname for [%s] from [%s] to [%s]" %
                        (item['title'], item['authors'], authorname))
                    item['authors'] = authorname
                bookid = find_book_in_db(authorname, item['title'])
                if not bookid:
                    searchterm = "%s <ll> %s" % (item['title'], authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.debug(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            bookid = result['bookid']
                            import_book(bookid)
                if bookid:
                    # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string
                    # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric)
                    # so convert all dict items to strings for ease of matching.
                    map_ctol[str(item['id'])] = str(bookid)
                    map_ltoc[str(bookid)] = str(item['id'])
                else:
                    logger.warn(
                        'Calibre Book [%s] by [%s] is not in lazylibrarian database'
                        % (item['title'], authorname))
                    nomatch += 1
            else:
                logger.warn(
                    'Calibre Author [%s] not matched in lazylibrarian database'
                    % (item['authors']))
                nomatch += 1

    # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones
    # which might be books calibre doesn't have, or might be minor differences in author or title

    for idlist in [("Read", readlist), ("To_Read", toreadlist)]:
        booklist = idlist[1]
        for bookid in booklist:
            cmd = "SELECT AuthorID,BookName from books where BookID=?"
            book = myDB.match(cmd, (bookid, ))
            if not book:
                logger.error('Error finding bookid %s' % bookid)
            else:
                cmd = "SELECT AuthorName from authors where AuthorID=?"
                author = myDB.match(cmd, (book['AuthorID'], ))
                if not author:
                    logger.error('Error finding authorid %s' %
                                 book['AuthorID'])
                else:
                    match = False
                    for item in calibre_list:
                        if item['authors'] == author['AuthorName'] and item[
                                'title'] == book['BookName']:
                            logger.debug("Exact match for %s [%s]" %
                                         (idlist[0], book['BookName']))
                            map_ctol[str(item['id'])] = str(bookid)
                            map_ltoc[str(bookid)] = str(item['id'])
                            match = True
                            break
                    if not match:
                        high = 0
                        highname = ''
                        highid = ''
                        for item in calibre_list:
                            if item['authors'] == author['AuthorName']:
                                n = fuzz.token_sort_ratio(
                                    item['title'], book['BookName'])
                                if n > high:
                                    high = n
                                    highname = item['title']
                                    highid = item['id']

                        if high > 95:
                            logger.debug(
                                "Found ratio match %s%% [%s] for %s [%s]" %
                                (high, highname, idlist[0], book['BookName']))
                            map_ctol[str(highid)] = str(bookid)
                            map_ltoc[str(bookid)] = str(highid)
                            match = True

                    if not match:
                        logger.warn(
                            "No match for %s %s by %s in calibre database, closest match %s%% [%s]"
                            % (idlist[0], book['BookName'],
                               author['AuthorName'], high, highname))
                        nomatch += 1

    logger.debug("BookID mapping complete, %s match %s, nomatch %s" %
                 (username, len(map_ctol), nomatch))

    # now sync the lists
    if userid:
        last_read = []
        last_toread = []
        calibre_read = []
        calibre_toread = []

        cmd = 'select SyncList from sync where UserID=? and Label=?'
        res = myDB.match(cmd, (userid, col_read))
        if res:
            last_read = getList(res['SyncList'])
        res = myDB.match(cmd, (userid, col_toread))
        if res:
            last_toread = getList(res['SyncList'])

        for item in calibre_list:
            if toreadcol and toreadcol in item and item[
                    toreadcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_toread.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre to_read book %s:%s has no lazylibrarian bookid"
                        % (item['authors'], item['title']))
            if readcol and readcol in item and item[readcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_read.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre read book %s:%s has no lazylibrarian bookid" %
                        (item['authors'], item['title']))

        logger.debug("Found %s calibre read, %s calibre toread" %
                     (len(calibre_read), len(calibre_toread)))
        logger.debug("Found %s lazylib read, %s lazylib toread" %
                     (len(readlist), len(toreadlist)))

        added_to_ll_toread = list(set(toreadlist) - set(last_toread))
        removed_from_ll_toread = list(set(last_toread) - set(toreadlist))
        added_to_ll_read = list(set(readlist) - set(last_read))
        removed_from_ll_read = list(set(last_read) - set(readlist))
        logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" %
                     (len(added_to_ll_toread), len(removed_from_ll_toread),
                      len(added_to_ll_read), len(removed_from_ll_read)))

        added_to_calibre_toread = list(set(calibre_toread) - set(last_toread))
        removed_from_calibre_toread = list(
            set(last_toread) - set(calibre_toread))
        added_to_calibre_read = list(set(calibre_read) - set(last_read))
        removed_from_calibre_read = list(set(last_read) - set(calibre_read))
        logger.debug(
            "calibre changes to copy to lazylibrarian: %s %s %s %s" %
            (len(added_to_calibre_toread), len(removed_from_calibre_toread),
             len(added_to_calibre_read), len(removed_from_calibre_read)))

        calibre_changes = 0
        for item in added_to_calibre_read:
            if item not in readlist:
                readlist.append(item)
                logger.debug("Lazylibrarian marked %s as read" % item)
                calibre_changes += 1
        for item in added_to_calibre_toread:
            if item not in toreadlist:
                toreadlist.append(item)
                logger.debug("Lazylibrarian marked %s as to_read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_read:
            if item in readlist:
                readlist.remove(item)
                logger.debug("Lazylibrarian removed %s from read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_toread:
            if item in toreadlist:
                toreadlist.remove(item)
                logger.debug("Lazylibrarian removed %s from to_read" % item)
                calibre_changes += 1
        if calibre_changes:
            myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?',
                        (', '.join(toreadlist), ', '.join(readlist), userid))
        ll_changes = 0
        for item in added_to_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_toread, item))
        for item in removed_from_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_toread, item))

        for item in added_to_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_read, item))

        for item in removed_from_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_read, item))

        # store current sync list as comparison for next sync
        controlValueDict = {"UserID": userid, "Label": col_read}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(readlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)
        controlValueDict = {"UserID": userid, "Label": col_toread}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(toreadlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        msg = "%s sync updated: %s calibre, %s lazylibrarian" % (
            username, ll_changes, calibre_changes)
    return msg