Beispiel #1
def search_rss_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHRSS"

    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')

    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks =
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook =
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:

    if len(searchbooks) == 0:
        return'RSS Searching for %i book%s' %
                (len(searchbooks), plural(len(searchbooks))))

    resultlist, nproviders = IterateOverRSSSites()
    if not nproviders:
        logger.warn('No rss providers are set, check config')
        return  # No point in continuing

    dic = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': '',
        '*': '',
        ':': '',
        ';': ''

    rss_count = 0
    for book in searchbooks:
        authorname, bookname = get_searchterm(book, "book")
        found = processResultList(resultlist, authorname, bookname, book,

        # if you can't find the book, try title without any "(extended details, series etc)"
        if not found:
            if '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname,
                                          book, 'shortbook')

        if not found:
                "Searches returned no results. Adding book %s - %s to queue." %
                (authorname, bookname))
        if found > True:
            rss_count = rss_count + 1"RSS Search for Wanted items complete, found %s book%s" %
                (rss_count, plural(rss_count)))

    if reset:
        scheduleJob(action='Restart', target='search_rss_book')
Beispiel #2
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    # noinspection PyBroadException
        myDB = database.DBConnection()
        dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                    ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                    '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                    ':': '', '!': '', '-': ' ', '\s\s': ' '}

        dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
               ',': '', '*': '', ':': '.', ';': '', '\'': ''}

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ", resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith('http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" % (resultTitle, word))

            size_temp = check_int(res[prefix + 'size'], 1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                    mode = res['tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                    wordlist = getList(resultTitle.lower())
                words = [x for x in wordlist if x not in getList(author.lower())]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [i for i, x in enumerate(typelist) if x == item]:
                            score += i + 1

                matches.append([score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
      'Nearest match (%s%%): %s using %s search for %s %s' %
                            (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName']))
      'Best match (%s%%): %s using %s search, %s priority %s' %
                            (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority))
            return highest
            logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
Beispiel #4
def search_rss_book(books=None, reset=False):
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')

        if not internet():
            logger.warn('Search RSS Book: No internet connection')

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverGoodReads()
        if not wishproviders:
            logger.debug('No rss wishlists are set')
            # for each item in resultlist, add to database if necessary, and mark as wanted
            for book in resultlist:
                # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                logger.debug('Processing %s item%s in wishlists' %
                             (len(resultlist), plural(len(resultlist))))
                if book['rss_bookid'] and lazylibrarian.CONFIG[
                        'BOOK_API'] == "GoodReads":
                    bookmatch = myDB.match(
                        'select Status,BookName from books where bookid="%s"' %
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                                u'Found book %s, already marked as "%s"' %
                                (bookname, bookstatus))
                        else:  # skipped/ignored
                  'Found book %s, marking as "Wanted"' %
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                                u'Found book %s by %s, already marked as "%s"'
                                % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                                u'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]
                            if result['isbn_fuzz'] > lazylibrarian.CONFIG[
                                    "Found (%s%%) %s: %s" %
                                    (result['isbn_fuzz'], result['authorname'],
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                                    "Found (%s%% %s%%) %s: %s" %
                                     result['book_fuzz'], result['authorname'],
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                        # noinspection PyUnboundLocalVariable
                        if not results:
                            msg += ', No results returned'
                            msg += ', No match found'
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors '
            cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc'
            searchbooks =

            # The user has added a new book
            searchbooks = []
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors '
                cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[
                cmd += 'AND books.Status="Wanted"'
                searchbook =
                for terms in searchbook:

        if len(searchbooks) == 0:

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders:
            if not wishproviders:
                logger.warn('No rss providers are set, check config')
            return  # No point in continuing'RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        rss_count = 0
        for book in searchbooks:
            authorname, bookname = get_searchterm(book, "book")
            found = processResultList(resultlist, authorname, bookname, book,

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname,
                                          book, 'shortbook')

            if not found:
                    "Searches returned no results. Adding book %s - %s to queue."
                    % (authorname, bookname))
            if found > True:
                rss_count += 1"RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

        if reset:
            scheduleJob(action='Restart', target='search_rss_book')

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
Beispiel #5
