예제 #1
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading
        return: True if already snatched, False if failed to snatch, >True if we snatched it
    """
    try:
        myDB = database.DBConnection()

        resultTitle = match[1]
        newValueDict = match[2]
        controlValueDict = match[3]

        if book['library'] == 'AudioBook':
            auxinfo = 'AudioBook'
        else:  # elif book['library'] == 'eBook':
            auxinfo = 'eBook'

        if auxinfo == 'eBook':
            snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                                       (newValueDict["BookID"],))
        else:
            snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                                       (newValueDict["BookID"],))

        if snatchedbooks:
            logger.debug('%s %s already marked snatched' % (book['authorName'], book['bookName']))
            return True  # someone else already found it
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if 'libgen' in newValueDict["NZBprov"]:  # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                              controlValueDict["NZBurl"], resultTitle, auxinfo)
            elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            elif newValueDict['NZBmode'] == 'nzb':
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"], auxinfo)
            else:
                logger.error('Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
                snatch = False

            if snatch:
                logger.info('Downloading %s %s from %s' %
                            (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s %s from %s at %s" %
                              (auxinfo, newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
                # This would implement a round-robin search system. Blocklist with an incremental counter.
                # If number of active providers == number blocklisted, so no unblocked providers are left,
                # either sleep for a while, or unblock the one with the lowest counter.
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
        return False
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc())
예제 #2
0
def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                cmd = 'select books.Status as Status,AudioStatus,authors.Status as AuthorStatus,'
                cmd += 'AuthorName,BookName,Requester,AudioRequester from books,authors '
                cmd += 'where books.AuthorID = authors.AuthorID and bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'],))
                if bookmatch:
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked %s' % (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found audiobook %s, already marked %s' % (bookname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus']))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": book['rss_bookid']}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    auth_res = myDB.match('SELECT Status from authors WHERE authorname=?', (authorname,))
                    if auth_res:
                        auth_status = auth_res['Status']
                    else:
                        auth_status = 'Unknown'
                    cmd = 'SELECT SeriesName,Status from series,member '
                    cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?'
                    series = myDB.select(cmd, (book['rss_bookid'],))
                    reject_series = None
                    for ser in series:
                        if ser['Status'] in ['Paused', 'Ignored']:
                            reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']}
                            break
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                    elif auth_status in ['Paused', 'Ignored']:
                        logger.info('Found book %s, but author is "%s"' % (bookname, auth_status))
                    elif reject_series:
                        logger.info('Found book %s, but series "%s" is %s' %
                                    (bookname, reject_series['Name'], reject_series['Status']))
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info('Found audiobook %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["AudioRequester"]:
                                newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '}
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict, controlValueDict)
                        else:
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status, audio_status)
                            new_books += 1
                            newValueDict = {"Requester": book["dispname"] + ' '}
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            newValueDict = {"AudioRequester": book["dispname"] + ' '}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #3
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading (needed for reporting author name)
        return: 0 if failed to snatch
                1 if already snatched
                2 if we snatched it
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        newValueDict = match[1]
        controlValueDict = match[2]

        # It's possible to get book and wanted tables "Snatched" status out of sync
        # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs
        # so check status in both tables here
        snatched = myDB.match(
            'SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"',
            (newValueDict["BookID"], newValueDict["AuxInfo"]))
        if snatched:
            logger.debug('%s %s %s already marked snatched in wanted table' %
                         (newValueDict["AuxInfo"], book['authorName'],
                          book['bookName']))
            return 1  # someone else already found it

        if newValueDict["AuxInfo"] == 'eBook':
            snatched = myDB.match(
                'SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                (newValueDict["BookID"], ))
        else:
            snatched = myDB.match(
                'SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                (newValueDict["BookID"], ))
        if snatched:
            logger.debug('%s %s %s already marked snatched in book table' %
                         (newValueDict["AuxInfo"], book['authorName'],
                          book['bookName']))
            return 1  # someone else already found it

        myDB.upsert("wanted", newValueDict, controlValueDict)
        if newValueDict['NZBmode'] == 'direct':
            snatch = DirectDownloadMethod(newValueDict["BookID"],
                                          newValueDict["NZBtitle"],
                                          controlValueDict["NZBurl"],
                                          newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
            snatch = TORDownloadMethod(newValueDict["BookID"],
                                       newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"],
                                       newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] == 'nzb':
            snatch = NZBDownloadMethod(newValueDict["BookID"],
                                       newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"],
                                       newValueDict["AuxInfo"])
        else:
            logger.error('Unhandled NZBmode [%s] for %s' %
                         (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
            snatch = 0

        if snatch:
            logger.info('Downloading %s %s from %s' %
                        (newValueDict["AuxInfo"], newValueDict["NZBtitle"],
                         newValueDict["NZBprov"]))
            custom_notify_snatch(
                "%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo']))
            notify_snatch("%s %s from %s at %s" %
                          (newValueDict["AuxInfo"], newValueDict["NZBtitle"],
                           newValueDict["NZBprov"], now()))
            # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
            # This would implement a round-robin search system. Blocklist with an incremental counter.
            # If number of active providers == number blocklisted, so no unblocked providers are left,
            # either sleep for a while, or unblock the one with the lowest counter.
            scheduleJob(action='Start', target='processDir')
            return 2  # we found it
        return 0
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' %
                     traceback.format_exc())
        return 0
예제 #4
0
def processDir(reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "POSTPROCESS"
        processpath = lazylibrarian.DIRECTORY('Download')

        logger.debug('Checking [%s] for files to post process' % processpath)

        try:
            downloads = os.listdir(processpath)
        except OSError as why:
            logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
            return

        myDB = database.DBConnection()
        snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

        if len(snatched) == 0:
            logger.info('Nothing marked as snatched.')
            scheduleJob(action='Stop', target='processDir')
            return

        if len(downloads) == 0:
            logger.info('No downloads are found. Nothing to process yet.')
            return

        logger.info("Checking %s download%s for %s snatched file%s" %
                    (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
        ppcount = 0
        for book in snatched:
            # if torrent, see if we can get current status from the downloader as the name
            # may have been changed once magnet resolved, or download started or completed
            # depending on torrent downloader. Usenet doesn't change the name. We like usenet.
            torrentname = ''
            try:
                logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source']))
                if book['Source'] == 'TRANSMISSION':
                    torrentname = transmission.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'UTORRENT':
                    torrentname = utorrent.nameTorrent(book['DownloadID'])
                elif book['Source'] == 'RTORRENT':
                    torrentname = rtorrent.getName(book['DownloadID'])
                elif book['Source'] == 'QBITTORRENT':
                    torrentname = qbittorrent.getName(book['DownloadID'])
                elif book['Source'] == 'SYNOLOGY_TOR':
                    torrentname = synology.getName(book['DownloadID'])
                elif book['Source'] == 'DELUGEWEBUI':
                    torrentname = deluge.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'DELUGERPC':
                    client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                             int(lazylibrarian.DELUGE_PORT),
                                             lazylibrarian.DELUGE_USER,
                                             lazylibrarian.DELUGE_PASS)
                    try:
                        client.connect()
                        result = client.call('core.get_torrent_status', book['DownloadID'], {})
                        #    for item in result:
                        #        logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                        if 'name' in result:
                            torrentname = unaccented_str(result['name'])
                    except Exception as e:
                        logger.debug('DelugeRPC failed %s' % str(e))
            except Exception as e:
                logger.debug("Failed to get updated torrent name from %s for %s: %s" %
                            (book['Source'], book['DownloadID'], str(e)))

            matchtitle = unaccented_str(book['NZBtitle'])
            if torrentname and torrentname != matchtitle:
                logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl']))
                matchtitle = torrentname

            # here we could also check percentage downloaded or eta or status?
            # If downloader says it hasn't completed, no need to look for it.

            matches = []
            logger.info('Looking for %s in %s' % (matchtitle, processpath))
            for fname in downloads:
                # skip if failed before or incomplete torrents, or incomplete btsync
                extn = os.path.splitext(fname)[1]
                if extn not in ['.fail', '.part', '.bts', '.!ut']:
                    # This is to get round differences in torrent filenames.
                    # Usenet is ok, but Torrents aren't always returned with the name we searched for
                    # We ask the torrent downloader for the torrent name, but don't always get an answer
                    # so we try to do a "best match" on the name, there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]

                    match = 0
                    if matchtitle:
                        if ' LL.(' in matchtitle:
                            matchtitle = matchtitle.split(' LL.(')[0]
                        match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match and match >= lazylibrarian.DLOAD_RATIO:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here. Book/mag file in download root.
                            # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                            logger.debug('filename [%s] is a file' % os.path.join(processpath, fname))
                            if is_valid_booktype(fname, booktype="book") \
                                    or is_valid_booktype(fname, booktype="mag"):
                                logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname))
                                if bts_file(processpath):
                                    logger.debug("Skipping %s, found a .bts file" % processpath)
                                else:
                                    fname = os.path.splitext(fname)[0]
                                    dirname = os.path.join(processpath, fname)
                                    if not os.path.exists(dirname):
                                        try:
                                            os.makedirs(dirname)
                                            setperm(dirname)
                                        except OSError as why:
                                            logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                    if os.path.exists(dirname):
                                        # move the book and any related files too
                                        # ie other book formats, or opf, jpg with same title
                                        # can't move metadata.opf or cover.jpg or similar
                                        # as can't be sure they are ours
                                        # not sure if we need a new listdir here, or whether we can use the old one
                                        list_dir = os.listdir(processpath)
                                        for ourfile in list_dir:
                                            if ourfile.startswith(fname):
                                                if is_valid_booktype(ourfile, booktype="book") \
                                                    or is_valid_booktype(ourfile, booktype="mag") \
                                                        or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                                    try:
                                                        if lazylibrarian.DESTINATION_COPY:
                                                            shutil.copyfile(os.path.join(processpath, ourfile),
                                                                            os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                        else:
                                                            shutil.move(os.path.join(processpath, ourfile),
                                                                        os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                    except Exception as why:
                                                        logger.debug("Failed to copy/move file %s to %s, %s" %
                                                                    (ourfile, dirname, str(why)))

                        pp_path = os.path.join(processpath, fname)
                        if os.path.isdir(pp_path):
                            logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle))
                            if not os.listdir(pp_path):
                                logger.debug("Skipping %s, folder is empty" % pp_path)
                            elif bts_file(pp_path):
                                logger.debug("Skipping %s, found a .bts file" % pp_path)
                            else:
                                matches.append([match, pp_path, book])
                    else:
                        pp_path = os.path.join(processpath, fname)
                        matches.append([match, pp_path, book])  # so we can report closest match
                else:
                    logger.debug('Skipping %s' % fname)

            match = 0
            if matches:
                highest = max(matches, key=lambda x: x[0])
                match = highest[0]
                pp_path = highest[1]
                book = highest[2]
            if match and match >= lazylibrarian.DLOAD_RATIO:
                logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle']))
                data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:  # it's a book
                    logger.debug(u'Processing book %s' % book['BookID'])
                    authorname = data['AuthorName']
                    bookname = data['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = unaccented(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = unaccented_str(replace_all(dest_path, dic))
                    dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:  # it's a magazine
                        logger.debug(u'Processing magazine %s' % book['BookID'])
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = unaccented_str(replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate', book['AuxInfo']).replace('$Title', mag_name)

                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(processpath, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = unaccented(global_name)
                    else:  # not recognised
                        logger.debug('Nothing in database matching "%s"' % book['BookID'])
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                if match:
                    logger.debug(u'Closest match (%s%%): %s' % (match, pp_path))
                    #for match in matches:
                    #    logger.info('Match: %s%%  %s' % (match[0], match[1]))
                continue

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname:
                    # it's a book, if None it's a magazine
                    if len(lazylibrarian.IMP_CALIBREDB):
                        logger.debug('Calibre should have created the extras for us')
                    else:
                        processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue:
                        if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                            older = int(mostrecentissue) > int(book['AuxInfo'])  # issuenumber
                        else:
                            older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                    else:
                        older = False
                    if older:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": today(),
                                    "IssueFile": dest_file,
                                    "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    create_cover(dest_file)

                # calibre or ll copied/moved the files we want, now delete source files

                to_delete = True
                if book['NZBmode'] in ['torrent', 'magnet']:
                    # Only delete torrents if we don't want to keep seeding
                    if lazylibrarian.KEEP_SEEDING:
                        logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle']))
                        to_delete = False
                    else:
                        # ask downloader to delete the torrent, but not the files
                        # we may delete them later, depending on other settings
                        if book['DownloadID'] != "unknown":
                            logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower()))
                            delete_task(book['Source'], book['DownloadID'], False)
                        else:
                            logger.warn("Unable to remove %s from %s, no DownloadID" %
                                (book['NZBtitle'], book['Source'].lower()))

                if to_delete:
                    # only delete the files if not in download root dir and if DESTINATION_COPY not set
                    if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath):
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now()))
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
                # if it's a book, reset status so we try for a different version
                # if it's a magazine, user can select a different one from pastissues table
                if bookname:
                    myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            dname, extn = os.path.splitext(directory)
            if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " found in download directory")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    if import_book(pp_path, bookID):
                        ppcount = ppcount + 1

        if ppcount == 0:
            logger.info('No snatched books/mags have been found')
        else:
            logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

        # Now check for any that are still marked snatched...
        if lazylibrarian.TASK_AGE:
            snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')
            if len(snatched) > 0:
                for snatch in snatched:
                    # FUTURE: we could check percentage downloaded or eta?
                    # if percentage is increasing, it's just slow
                    try:
                        when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S')
                        when_snatched = time.mktime(when_snatched)
                        diff = time.time() - when_snatched  # time difference in seconds
                    except:
                        diff = 0
                    hours = int(diff / 3600)
                    if hours >= lazylibrarian.TASK_AGE:
                        logger.warn('%s was sent to %s %s hours ago, deleting failed task' %
                                    (snatch['NZBtitle'], snatch['Source'].lower(), hours))
                        # change status to "Failed", and ask downloader to delete task and files
                        if snatch['BookID'] != 'unknown':
                            myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID'])
                            myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID'])
                            delete_task(snatch['Source'], snatch['DownloadID'], True)
        if reset:
            scheduleJob(action='Restart', target='processDir')

    except Exception as e:
        logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
예제 #5
0
def search_tor_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHTOR"

        if not lazylibrarian.USE_TOR():
            logger.warn('No Torrent providers set, check config')
            return

        myDB = database.DBConnection()
        searchlist = []

        if books is None:
            # We are performing a backlog search
            searchbooks = myDB.select(
                'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc')
        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                searchbook = myDB.select('SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                         AND Status="Wanted"' % book['bookid'])
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        logger.info('TOR Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            searchlist.append(
                {"bookid": searchbook['BookID'],
                 "bookName": searchbook['BookName'],
                 "bookSub": searchbook['BookSub'],
                 "authorName": searchbook['AuthorName'],
                 "searchterm": searchterm})

        tor_count = 0
        for book in searchlist:

            resultlist, nproviders = IterateOverTorrentSites(book, 'book')
            if not nproviders:
                logger.warn('No torrent providers are set, check config')
                return  # No point in continuing

            found = processResultList(resultlist, book, "book")

            # if you can't find the book, try author/title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:
                resultlist, nproviders = IterateOverTorrentSites(book, 'shortbook')
                found = processResultList(resultlist, book, "shortbook")

            # general search is the same as booksearch for torrents
            # if not found:
            #    resultlist, nproviders = IterateOverTorrentSites(book, 'general')
            #    found = processResultList(resultlist, book, "general")

            if not found:
                logger.debug("Searches for %s returned no results." % book['searchterm'])
            if found > True:
                tor_count = tor_count + 1

        logger.info("TORSearch for Wanted items complete, found %s book%s" % (tor_count, plural(tor_count)))

        if reset:
            scheduleJob(action='Restart', target='search_tor_book')

    except Exception as e:
        logger.error('Unhandled exception in search_tor_book: %s' % traceback.format_exc())
예제 #6
0
def search_wishlist():
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' %
                     (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if not
            # eg NYTimes wishlist
            if 'E' in book['types']:
                ebook_status = "Wanted"
            else:
                ebook_status = "Skipped"
            if 'A' in book['types']:
                audio_status = "Wanted"
            else:
                audio_status = "Skipped"
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book[
                    'rss_bookid']:
                cmd = 'select Status,AudioStatus,BookName,Requester,AudioRequester from books where bookid=?'
                bookmatch = myDB.match(cmd, (book['rss_bookid'], ))
                if bookmatch:
                    bookname = bookmatch['BookName']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked as "%s"' %
                                    (bookname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif ebook_status == "Wanted":  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' %
                                    bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s, already marked as "%s"' %
                            (bookname, bookmatch['AudioStatus']))
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif audio_status == "Wanted":  # skipped/ignored
                        logger.info('Found audiobook %s, marking as "Wanted"' %
                                    bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {
                                    "BookID": book['rss_bookid']
                                }
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": book['rss_bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                else:
                    import_book(book['rss_bookid'], ebook_status, audio_status)
                    new_books += 1
                    newValueDict = {"Requester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
                    newValueDict = {"AudioRequester": book["dispname"] + ' '}
                    controlValueDict = {"BookID": book['rss_bookid']}
                    myDB.upsert("books", newValueDict, controlValueDict)
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    if bookmatch['Status'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' %
                            (bookname, authorname, bookmatch['Status']))
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif ebook_status == 'Wanted':  # skipped/ignored
                        logger.info(
                            'Found book %s by %s, marking as "Wanted"' %
                            (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch["Requester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch["Requester"]:
                                newValueDict = {
                                    "Requester":
                                    bookmatch["Requester"] + book["dispname"] +
                                    ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found audiobook %s by %s, already marked as "%s"'
                            % (bookname, authorname, bookmatch['AudioStatus']))
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    elif audio_status == 'Wanted':  # skipped/ignored
                        logger.info(
                            'Found audiobook %s by %s, marking as "Wanted"' %
                            (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"AudioStatus": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                        if bookmatch[
                                "AudioRequester"]:  # Already on a wishlist
                            if book["dispname"] not in bookmatch[
                                    "AudioRequester"]:
                                newValueDict = {
                                    "AudioRequester":
                                    bookmatch["AudioRequester"] +
                                    book["dispname"] + ' '
                                }
                                controlValueDict = {"BookID": bookid}
                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                        else:
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": bookid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(
                                lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%%) %s: %s" %
                                (result['isbn_fuzz'], result['authorname'],
                                 result['bookname']))
                            import_book(result['bookid'], ebook_status,
                                        audio_status)
                            new_books += 1
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'],
                                                     formatAuthorName(
                                                         book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            import_book(result['bookid'], ebook_status,
                                        audio_status)
                            new_books += 1
                            newValueDict = {
                                "Requester": book["dispname"] + ' '
                            }
                            controlValueDict = {"BookID": result['bookid']}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            newValueDict = {
                                "AudioRequester": book["dispname"] + ' '
                            }
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                                                          book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" %
                        (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #7
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
        (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug("Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                    (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                       ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                    lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                            lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                        '$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug('Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                else:
                    newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                    "IssueStatus": "Open"}
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                newValueDict = {"IssueAcquired": today(),
                                "IssueFile": dest_file,
                                "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
예제 #8
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not mags:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if not mags:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'direct'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders, dltypes = IterateOverRSSSites()
                if not nproviders or 'M' not in dltypes:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        if 'M' in item['types']:
                            resultlist.append({
                                'bookid': book['bookid'],
                                'nzbprov': item['tor_prov'],
                                'nzbtitle': item['tor_title'],
                                'nzburl': item['tor_url'],
                                'nzbdate': item['tor_date'],  # may be fake date as none returned from rss torrents
                                'nzbsize': item['tor_size'],
                                'nzbmode': item['tor_type']
                            })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic)
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split()

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            bookid_exploded = replace_all(bookid, dic).split()

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #9
0
def search_nzb_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHNZB"

    if not lazylibrarian.USE_NZB():
        logger.warn('No NEWZNAB/TORZNAB providers set, check config')
        return
    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc')
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("NZB search requested for no books or invalid BookID")
        return
    else:
        logger.info('NZB Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

    for searchbook in searchbooks:
        # searchterm is only used for display purposes
        searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName']
        if searchbook['BookSub']:
            searchterm = searchterm + ': ' + searchbook['BookSub']
        searchterm = searchterm + '"'

        searchlist.append({"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "searchterm": searchterm})

    if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST:
        logger.warn('No download method is set, use SABnzbd/NZBGet or blackhole, check config')

    nzb_count = 0
    for book in searchlist:
        # first attempt, try author/title in category "book"
        resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'book')

        if not nproviders:
            logger.warn('No NewzNab or TorzNab providers are set, check config')
            return  # no point in continuing

        found = processResultList(resultlist, book, "book")

        # if you can't find the book, try author/title without any "(extended details, series etc)"
        if not found and '(' in book['bookName']:
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'shortbook')
            found = processResultList(resultlist, book, "shortbook")

        # if you can't find the book under "books", you might find under general search
        if not found:
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'general')
            found = processResultList(resultlist, book, "general")

        if not found:
            logger.debug("NZB Searches for %s returned no results." % book['searchterm'])
        if found > True:
            nzb_count = nzb_count + 1  # we found it

    logger.info("NZBSearch for Wanted items complete, found %s book%s" % (nzb_count, plural(nzb_count)))

    if reset:
        scheduleJob(action='Restart', target='search_nzb_book')
예제 #10
0
def search_rss_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHRSS"

        if not(lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        myDB = database.DBConnection()
        searchlist = []

        if books is None:
            # We are performing a backlog search
            searchbooks = myDB.select(
                'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" \
                order by BookAdded desc')
        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                searchbook = myDB.select('SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                         AND Status="Wanted"' % book['bookid'])
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are set, check config')
            return  # No point in continuing

        dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
               ',': '', '*': '', ':': '', ';': ''}

        rss_count = 0
        for book in searchbooks:
            authorname, bookname = get_searchterm(book, "book")
            found = processResultList(resultlist, authorname, bookname, book, 'book')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname, book, 'shortbook')

            if not found:
                logger.debug("Searches returned no results. Adding book %s - %s to queue." % (authorname, bookname))
            if found > True:
                rss_count = rss_count + 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

        if reset:
            scheduleJob(action='Restart', target='search_rss_book')

    except Exception as e:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
예제 #11
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(
            lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' %
                     (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
                (len(downloads), plural(
                    len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath,
                                                   os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug(
                                        'Failed to create directory %s, %s' %
                                        (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(
                                        os.path.join(processpath, fname),
                                        os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug(
                                        "Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' %
                                     (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' %
                                 (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                        (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' %
                               book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower(
                ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {
                    '<': '',
                    '>': '',
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': '',
                    '\'': ''
                }
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' %
                                   book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0][
                        'IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {
                        '<': '',
                        '>': '',
                        '...': '',
                        ' & ': ' ',
                        ' = ': ' ',
                        '?': '',
                        '$': 's',
                        ' + ': ' ',
                        '"': '',
                        ',': '',
                        '*': '',
                        ':': '',
                        ';': '',
                        '\'': ''
                    }
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(
                            lazylibrarian.DESTINATION_DIR,
                            dest_path).encode(lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(
                            lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug(
                        "Snatched magazine %s is not in download directory" %
                        (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" %
                         (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(
                            book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(
                            book['AuxInfo'])  # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                else:
                    newValueDict = {
                        "IssueDate": book['AuxInfo'],
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {
                    "Title": book['BookID'],
                    "IssueDate": book['AuxInfo']
                }
                newValueDict = {
                    "IssueAcquired":
                    today(),
                    "IssueFile":
                    dest_file,
                    "IssueID":
                    create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action(
                    'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                    book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(
        processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' %
                    (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
예제 #12
0
def search_wishlist():
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_wishlist')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "SEARCHWISHLIST"

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
            scheduleJob(action='Stop', target='search_wishlist')
            return  # No point in continuing

        # for each item in resultlist, add to database if necessary, and mark as wanted
        logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
        for book in resultlist:
            # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
            # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
            # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],))
                if bookmatch:
                    bookstatus = bookmatch['Status']
                    bookname = bookmatch['BookName']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info('Found book %s, already marked as "%s"' % (bookname, bookstatus))
                    else:  # skipped/ignored
                        logger.info('Found book %s, marking as "Wanted"' % bookname)
                        controlValueDict = {"BookID": book['rss_bookid']}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:
                    import_book(book['rss_bookid'])
                    new_books += 1
            else:
                item = {}
                results = None
                item['Title'] = book['rss_title']
                if book['rss_bookid']:
                    item['BookID'] = book['rss_bookid']
                if book['rss_isbn']:
                    item['ISBN'] = book['rss_isbn']
                bookmatch = finditem(item, book['rss_author'])
                if bookmatch:  # it's already in the database
                    authorname = bookmatch['AuthorName']
                    bookname = bookmatch['BookName']
                    bookid = bookmatch['BookID']
                    bookstatus = bookmatch['Status']
                    if bookstatus in ['Open', 'Wanted', 'Have']:
                        logger.info(
                            'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                    else:  # skipped/ignored
                        logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                        controlValueDict = {"BookID": bookid}
                        newValueDict = {"Status": "Wanted"}
                        myDB.upsert("books", newValueDict, controlValueDict)
                        new_books += 1
                else:  # not in database yet
                    if book['rss_isbn']:
                        results = search_for(book['rss_isbn'])
                    if results:
                        result = results[0]  # type: dict
                        if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%%) %s: %s" %
                                        (result['isbn_fuzz'], result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True
                    if not results:
                        searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                        results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]  # type: dict
                        if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                            logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                      result['authorname'], result['bookname']))
                            import_book(result['bookid'])
                            new_books += 1
                            bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            result = results[0]  # type: dict
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                        logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

    except Exception:
        logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #13
0
def search_nzb_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLNZB"
            else:
                threading.currentThread().name = "SEARCHNZB"

        if not lazylibrarian.USE_NZB():
            logger.warn('No NEWZNAB/TORZNAB providers set, check config')
            return

        if not internet():
            logger.warn('Search NZB Book: No internet connection')
            return

        myDB = database.DBConnection()
        searchlist = []

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors '
            cmd += 'WHERE books.Status="Wanted" and books.AuthorID = authors.AuthorID order by BookAdded desc'
            searchbooks = myDB.select(cmd)
        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors'
                cmd += ' WHERE BookID="%s"' % book['bookid']
                cmd += ' AND books.AuthorID = authors.AuthorID AND books.Status="Wanted"'
                searchbook = myDB.select(cmd)
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        logger.info('NZB Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            searchlist.append({
                "bookid": searchbook['BookID'],
                "bookName": searchbook['BookName'],
                "bookSub": searchbook['BookSub'],
                "authorName": searchbook['AuthorName'],
                "searchterm": searchterm
            })

        nzb_count = 0
        for book in searchlist:
            # first attempt, try author/title in category "book"
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'book')

            if not nproviders:
                logger.warn(
                    'No NewzNab or TorzNab providers are set, check config')
                return  # no point in continuing

            found = processResultList(resultlist, book, "book")

            # if you can't find the book, try author/title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:
                resultlist, nproviders = providers.IterateOverNewzNabSites(
                    book, 'shortbook')
                found = processResultList(resultlist, book, "shortbook")

            # if you can't find the book under "books", you might find under general search
            if not found:
                resultlist, nproviders = providers.IterateOverNewzNabSites(
                    book, 'general')
                found = processResultList(resultlist, book, "general")

            # if still not found, try general search again without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:
                resultlist, nproviders = providers.IterateOverNewzNabSites(
                    book, 'shortgeneral')
                found = processResultList(resultlist, book, "shortgeneral")

            if not found:
                logger.info("NZB Searches for %s returned no results." %
                            book['searchterm'])
            if found > True:
                nzb_count += 1  # we found it

        logger.info("NZBSearch for Wanted items complete, found %s book%s" %
                    (nzb_count, plural(nzb_count)))

        if reset:
            scheduleJob(action='Restart', target='search_nzb_book')

    except Exception:
        logger.error('Unhandled exception in search_nzb_book: %s' %
                     traceback.format_exc())
예제 #14
0
def processResultList(resultlist, authorname, bookname, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)

    matches = []

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = unaccented_str(replace_all(tor['tor_title'],
                                              dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(authorname, torTitle)
        tor_Title_match = fuzz.token_set_ratio(bookname, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" %
                     (tor_Author_match, tor_Title_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in authorname.lower(
                ) and word not in bookname.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (torTitle, word))
                    break

        tor_size_temp = tor[
            'tor_size']  # Need to cater for when this is NONE (Issue 35)
        if tor_size_temp is None:
            tor_size_temp = 1000
        tor_size = round(float(tor_size_temp) / 1048576, 2)
        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)

        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (tor_Title_match + tor_Author_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(authorname))
            words -= len(getList(bookname))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.debug(
                u'Nearest RSS match (%s%%): %s using %s search for %s %s' %
                (score, nzb_Title, searchtype, authorname, bookname))
            return False

        logger.info(u'Best RSS match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"])

        if snatchedbooks:  # check if one of the other downloaders got there first
            logger.info('%s already marked snatched' % nzb_Title)
            return True
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                logger.info(
                    'Downloading %s from %s' %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch(
                    "%s from %s at %s" %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No RSS found for " +
                     (book["authorName"] + ' ' + book['bookName']).strip())
    return False
예제 #15
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                ':': '', '!': '', '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ',
                # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': '', '\'': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'], dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title))
        nzburl = nzb['nzburl']

        rejected = False

        already_failed = myDB.action('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                    nzburl).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (nzb_Title, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (nzb_Title, word))
                    break

        nzbsize_temp = nzb['nzbsize']  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and nzbsize > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % nzb_Title)

        if not rejected:
            if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio:
                bookid = book['bookid']
                nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": nzbsize,
                    "NZBtitle": nzbTitle,
                    "NZBmode": nzbmode,
                    "Status": "Skipped"
                }

                score = (nzbBook_match + nzbAuthor_match)/2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(nzb_Title))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append([score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match NZB (%s%%): %s using %s search' %
            (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it
        else:
            logger.debug('%s adding to wanted' % nzb_Title)
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No nzb's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
예제 #16
0
def search_nzb_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHNZB"

    if not lazylibrarian.USE_NZB():
        logger.warn('No NEWZNAB/TORZNAB providers set, check config')
        return
    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        )
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select(
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("NZB search requested for no books or invalid BookID")
        return
    else:
        logger.info('NZB Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

    for searchbook in searchbooks:
        # searchterm is only used for display purposes
        searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName']
        if searchbook['BookSub']:
            searchterm = searchterm + ': ' + searchbook['BookSub']
        searchterm = searchterm + '"'

        searchlist.append({
            "bookid": searchbook['BookID'],
            "bookName": searchbook['BookName'],
            "bookSub": searchbook['BookSub'],
            "authorName": searchbook['AuthorName'],
            "searchterm": searchterm
        })

    if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST:
        logger.warn(
            'No download method is set, use SABnzbd/NZBGet or blackhole, check config'
        )

    nzb_count = 0
    for book in searchlist:
        # first attempt, try author/title in category "book"
        resultlist, nproviders = providers.IterateOverNewzNabSites(
            book, 'book')

        if not nproviders:
            logger.warn(
                'No NewzNab or TorzNab providers are set, check config')
            return  # no point in continuing

        found = processResultList(resultlist, book, "book")

        # if you can't find the book, try author/title without any "(extended details, series etc)"
        if not found and '(' in book['bookName']:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'shortbook')
            found = processResultList(resultlist, book, "shortbook")

        # if you can't find the book under "books", you might find under general search
        if not found:
            resultlist, nproviders = providers.IterateOverNewzNabSites(
                book, 'general')
            found = processResultList(resultlist, book, "general")

        if not found:
            logger.debug("NZB Searches for %s returned no results." %
                         book['searchterm'])
        if found > True:
            nzb_count = nzb_count + 1  # we found it

    logger.info("NZBSearch for Wanted items complete, found %s book%s" %
                (nzb_count, plural(nzb_count)))

    if reset:
        scheduleJob(action='Restart', target='search_nzb_book')
예제 #17
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
  try:
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Regex']

        if not searchterm:
            searchterm = searchmag['Title']
            dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
            searchterm = unaccented_str(replace_all(searchterm, dic))
            searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING)

        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn('No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                            item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_name = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match('SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                    bad_name = bad_name + 1
                else:
                    rejected = False
                    maxsize = check_int(lazylibrarian.REJECT_MAGSIZE, 0)
                    if maxsize and nzbsize > maxsize:
                        logger.debug("Rejecting %s, too large" % nzbtitle)
                        rejected = True

                    if not rejected:
                        control_date = results['IssueDate']
                        reject_list = getList(results['Reject'])

                        dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': ''}
                        nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                        # Need to make sure that substrings of magazine titles don't get found
                        # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                        # remove extra spaces if they're in a row
                        nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                        nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                        if ' ' in bookid:
                            bookid_exploded = bookid.split(' ')
                        else:
                            bookid_exploded = [bookid]

                        # check nzb starts with magazine title followed by a date
                        # eg The MagPI Issue 22 - July 2015

                        if len(nzbtitle_exploded) > len(bookid_exploded):
                            # needs to be longer as it has to include a date
                            # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                            mag_title_match = fuzz.token_set_ratio(
                                unaccented(bookid),
                                unaccented(nzbtitle_formatted))

                            if mag_title_match < lazylibrarian.MATCH_RATIO:
                                logger.debug(
                                    u"Magazine token set Match failed: " + str(
                                        mag_title_match) + "% for " + nzbtitle_formatted)
                                rejected = True
                            else:
                                logger.debug(
                                    u"Magazine matched: " + str(
                                        mag_title_match) + "% " + bookid + " for " + nzbtitle_formatted)
                        else:
                            rejected = True

                    if not rejected:
                        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                                    nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted, already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                break

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # or other words we don't want. Should make the word list configurable.
                        # so strip all the trailing junk...
                        strip_list = ['pdf', 'true', 'truepdf', 'german', 'ebooks']
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in strip_list:
                                nzbtitle_exploded.pop()  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months, or month <1 or >12
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                    regexD_issue = regexD_issue.strip(',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexD_year.isdigit():
                                                        if int(regexD_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            # regexE nn YYYY issue number without "Nr" before it
                                            # nn is assumed not to be a month as they are normally names not digits
                                            try:
                                                regexE_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                regexE_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                print "[%s][%s]" % (regexE_year, regexE_issue)
                                                if regexE_issue.isdigit():
                                                    newdatish = int(regexE_issue)
                                                    if int(regexE_year) < int(datetime.date.today().year):
                                                        newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                            except Exception:
                                                # regexF issue and year as a single 6 digit string eg 222015
                                                try:
                                                    regexF = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexF.isdigit() and len(regexF) == 6:
                                                        regexF_issue = regexF[:2]
                                                        regexF_year = regexF[2:]
                                                        newdatish = str(int(regexF_issue))  # 4 == 04 == 004
                                                        if int(regexF_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                    else:
                                                        raise ValueError

                                                except Exception:
                                                    logger.debug('Magazine %s not in a recognised date format.' %
                                                                 nzbtitle_formatted)
                                                    bad_date = bad_date + 1
                                                    # allow issues with good name but bad date to be included
                                                    # so user can manually select them, incl those with issue numbers
                                                    newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                                    # continue
                        else:
                            logger.debug('Magazine [%s] does not match the search term [%s].' % (
                                nzbtitle_formatted, bookid))
                            bad_name = bad_name + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= int(lazylibrarian.MAG_AGE) * 24 * 60 * 60  # number of seconds in days
                                if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                    start_time = 0
                                control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                logger.debug('Magazine date comparing to %s' % control_date)
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug('Magazine %s incorrect date or issue format.' % nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select('SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % (
                                                insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict, controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_name = bad_name + 1

            logger.info('Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (
                        total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_name, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" %
                                  (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")

  except Exception as e:
    logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
예제 #18
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                 )
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select(
                'SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"'
                % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' %
                (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Title']
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {
            '...': '',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': ''
        }

        searchterm = unaccented_str(replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ',
                            searchterm).encode(lazylibrarian.SYS_ENCODING)
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn(
            'There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No nzb providers are set. Check config for NEWZNAB or TORZNAB providers'
                )

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No torrent providers are set. Check config for TORRENT providers'
                )

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                        'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': item[
                            'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace(
                    "'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match(
                    'SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug(
                        'Magazine [%s] does not match search term [%s].' %
                        (nzbtitle, bookid))
                    bad_regex = bad_regex + 1
                else:
                    control_date = results['IssueDate']
                    reject_list = getList(results['Regex'])

                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': ''
                    }
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(
                        nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    rejected = False
                    if len(nzbtitle_exploded) > len(
                            bookid_exploded
                    ):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(
                            unaccented(bookid), unaccented(nzbtitle_formatted))

                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(u"Magazine token set Match failed: " +
                                         str(mag_title_match) + "% for " +
                                         nzbtitle_formatted)
                            rejected = True
                    else:
                        rejected = True

                    if not rejected:
                        already_failed = myDB.match(
                            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"'
                            % nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted,
                                          already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" %
                                             (nzbtitle_formatted, word))
                                break

                    # maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
                    # if maxsize and nzbsize > maxsize:
                    #    rejected = True
                    #    logger.debug("Rejecting %s, too large" % nzbtitle_formatted)

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in ['pdf', 'true', 'truepdf']:
                            nzbtitle_exploded.pop(
                            )  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(
                                unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(
                                    regexA_year) < 1900 or int(
                                        regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[
                                len(nzbtitle_exploded) -
                                3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day
                                       ) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year),
                                                      int(regexA_month),
                                                      int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(
                                    unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[
                                    len(nzbtitle_exploded) -
                                    2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(
                                        regexB_year) < 1900 or int(
                                            regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(
                                        int(regexB_year), int(regexB_month),
                                        int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[
                                        len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit(
                                    ) and int(regexC_year) > 1900 and int(
                                            regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[
                                            len(nzbtitle_exploded) -
                                            1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[
                                            len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit(
                                        ) and int(regexC_year) > 1900 and int(
                                                regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                2].zfill(2)
                                            regexC_day = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(
                                            int(regexC_year),
                                            int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Vol nn, YYYY or Issue/No/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[
                                                len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in [
                                                    "issue", "no", "vol"
                                            ]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(
                                                        int(regexD_issue)
                                                    )  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in [
                                                        "issue", "no", "vol"
                                                ]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 2]
                                                    regexD_issue = regexD_issue.strip(
                                                        ',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(
                                                            int(regexD_issue)
                                                        )  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 1]
                                                    if regexD_year.isdigit():
                                                        if int(
                                                                regexD_year
                                                        ) < int(datetime.date.
                                                                today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            logger.debug(
                                                'Magazine %s not in proper date format.'
                                                % nzbtitle_formatted)
                                            bad_date = bad_date + 1
                                            # allow issues with good name but bad date to be included
                                            # so user can manually select them, incl those with issue numbers
                                            newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                            # continue
                        else:
                            logger.debug(
                                'Magazine [%s] does not match the search term [%s].'
                                % (nzbtitle_formatted, bookid))
                            bad_regex = bad_regex + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                                control_date = time.strftime(
                                    "%Y-%m-%d", time.localtime(start_time))
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(
                                newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(
                                4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug(
                                'Magazine %s incorrect date or issue format.' %
                                nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug(
                                    'This issue of %s is new, downloading' %
                                    nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug(
                                    'This issue of %s is already flagged for download'
                                    % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug(
                                    'This issue of %s is old; skipping.' %
                                    nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select(
                            'SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"'
                            % (insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict,
                                        controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_regex = bad_regex + 1

            logger.info(
                'Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download'
                % (total_nzbs, plural(total_nzbs), bookid, new_date, old_date,
                   bad_date, bad_regex, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' %
                                (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" % (unaccented(
                        magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")
예제 #19
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                                item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #20
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not books:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        myDB = database.DBConnection()

        searchbooks = []
        if not books:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'], ))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders, _ = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are available')
            scheduleJob(action='Stop', target='search_rss_book')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'eBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "eBook",
                            "searchterm":
                            searchterm
                        })

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"], ))
                    if snatched:
                        logger.warn(
                            'AudioBook %s %s already marked snatched in wanted table'
                            %
                            (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append({
                            "bookid":
                            searchbook['BookID'],
                            "bookName":
                            searchbook['BookName'],
                            "bookSub":
                            searchbook['BookSub'],
                            "authorName":
                            searchbook['AuthorName'],
                            "library":
                            "AudioBook",
                            "searchterm":
                            searchterm
                        })

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("RSS Searches for %s %s returned no results." %
                            (book['library'], book['searchterm']))
            if found > 1:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #21
0
def search_rss_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHRSS"

    if not(lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return

    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc')
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("RSS search requested for no books or invalid BookID")
        return
    else:
        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

    resultlist, nproviders = IterateOverRSSSites()
    if not nproviders:
        logger.warn('No rss providers are set, check config')
        return  # No point in continuing

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    rss_count = 0
    for book in searchbooks:
        author = book['AuthorName']
        title = book['BookName']

        author = unaccented_str(replace_all(author, dic))
        title = unaccented_str(replace_all(title, dic))

        found = processResultList(resultlist, author, title, book)

        # if you can't find the book, try author without initials,
        # and title without any "(extended details, series etc)"
        if not found:
            if author[1] in '. ' or '(' in title:  # anything to shorten?
                while author[1] in '. ':  # strip any initials
                    author = author[2:].strip()  # and leading whitespace
                if '(' in title:
                    title = title.split('(')[0]
                found = processResultList(resultlist, author, title, book)

        if not found:
            logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title))
        if found > True:
            rss_count = rss_count + 1

    logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    if reset:
        scheduleJob(action='Restart', target='search_rss_book')
예제 #22
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                              newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
예제 #23
0
def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)

    matches = []

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = unaccented_str(replace_all(tor['tor_title'], dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, torTitle)
        tor_Title_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.action('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                    tor_url).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and not word in author.lower() and not word in book.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        if tor_size_temp is None:
            tor_size_temp = 1000
        tor_size = round(float(tor_size_temp) / 1048576, 2)
        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)

        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            if tor_Title_match >= match_ratio and tor_Author_match >= match_ratio:
                bookid = book['bookid']
                tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                             ' LL.(' + book['bookid'] + ')').strip()
                tor_prov = tor['tor_prov']
                tor_feed = tor['tor_feed']

                controlValueDict = {"NZBurl": tor_url}
                newValueDict = {
                    "NZBprov": tor_prov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": tor_size,
                    "NZBtitle": tor_Title,
                    "NZBmode": "torrent",
                    "Status": "Skipped"
                }

                score = (tor_Title_match + tor_Author_match)/2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(torTitle))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match RSS (%s%%): %s using %s search' %
            (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()

        if snatchedbooks:  # check if one of the other downloaders got there first
            logger.debug('%s already marked snatched' % nzb_Title)
            return True
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                notify_snatch(newValueDict["NZBtitle"] + ' at ' + now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                book['bookName']).strip())
    return False
예제 #24
0
def search_rss_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        if not internet():
            logger.warn('Search RSS Book: No internet connection')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverGoodReads()
        if not wishproviders:
            logger.debug('No rss wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            for book in resultlist:
                # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                logger.debug('Processing %s item%s in wishlists' %
                             (len(resultlist), plural(len(resultlist))))
                if book['rss_bookid'] and lazylibrarian.CONFIG[
                        'BOOK_API'] == "GoodReads":
                    bookmatch = myDB.match(
                        'select Status,BookName from books where bookid="%s"' %
                        book['rss_bookid'])
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s, already marked as "%s"' %
                                (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s, marking as "Wanted"' %
                                        bookname)
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    else:
                        import_book(book['rss_bookid'])
                else:
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                        headers.append('BookID')
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                        headers.append('ISBN')
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s by %s, already marked as "%s"'
                                % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(
                                u'Found book %s by %s, marking as "Wanted"' %
                                (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]
                            if result['isbn_fuzz'] > lazylibrarian.CONFIG[
                                    'MATCH_RATIO']:
                                logger.info(
                                    "Found (%s%%) %s: %s" %
                                    (result['isbn_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (
                                item['Title'],
                                formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]
                            if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                                logger.info(
                                    "Found (%s%% %s%%) %s: %s" %
                                    (result['author_fuzz'],
                                     result['book_fuzz'], result['authorname'],
                                     result['bookname']))
                                import_book(result['bookid'])
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'],
                                                          book['rss_author'])
                        # noinspection PyUnboundLocalVariable
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (
                                result['author_fuzz'], result['book_fuzz'],
                                result['authorname'], result['bookname'])
                            logger.warn(msg)

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors '
            cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc'
            searchbooks = myDB.select(cmd)

        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors '
                cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[
                    'bookid']
                cmd += 'AND books.Status="Wanted"'
                searchbook = myDB.select(cmd)
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders:
            if not wishproviders:
                logger.warn('No rss providers are set, check config')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

        rss_count = 0
        for book in searchbooks:
            authorname, bookname = get_searchterm(book, "book")
            found = processResultList(resultlist, authorname, bookname, book,
                                      'book')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname,
                                          book, 'shortbook')

            if not found:
                logger.debug(
                    "Searches returned no results. Adding book %s - %s to queue."
                    % (authorname, bookname))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" %
                    (rss_count, plural(rss_count)))

        if reset:
            scheduleJob(action='Restart', target='search_rss_book')

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' %
                     traceback.format_exc())
예제 #25
0
def search_tor_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHTOR"

    if not lazylibrarian.USE_TOR():
        logger.warn('No Torrent providers set, check config')
        return

    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        )
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select(
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("TOR search requested for no books or invalid BookID")
        return
    else:
        logger.info('TOR Searching for %i book%s' %
                    (len(searchbooks), plural(len(searchbooks))))

    for searchbook in searchbooks:
        # searchterm is only used for display purposes
        searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName']
        if searchbook['BookSub']:
            searchterm = searchterm + ': ' + searchbook['BookSub']
        searchterm = searchterm + '"'

        searchlist.append({
            "bookid": searchbook['BookID'],
            "bookName": searchbook['BookName'],
            "bookSub": searchbook['BookSub'],
            "authorName": searchbook['AuthorName'],
            "searchterm": searchterm
        })

    tor_count = 0
    for book in searchlist:

        resultlist, nproviders = IterateOverTorrentSites(book, 'book')
        if not nproviders:
            logger.warn('No torrent providers are set, check config')
            return  # No point in continuing

        found = processResultList(resultlist, book, "book")

        # if you can't find the book, try author/title without any "(extended details, series etc)"
        if not found and '(' in book['bookName']:
            resultlist, nproviders = IterateOverTorrentSites(book, 'shortbook')
            found = processResultList(resultlist, book, "shortbook")

        # if you can't find the book under "books", you might find under general search
        if not found:
            resultlist, nproviders = IterateOverTorrentSites(book, 'general')
            found = processResultList(resultlist, book, "general")

        if not found:
            logger.debug("Searches for %s returned no results." %
                         book['searchterm'])
        if found > True:
            tor_count = tor_count + 1

    logger.info("TORSearch for Wanted items complete, found %s book%s" %
                (tor_count, plural(tor_count)))

    if reset:
        scheduleJob(action='Restart', target='search_tor_book')
예제 #26
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        if not (lazylibrarian.USE_RSS()):
            logger.warn('RSS search is disabled')
            scheduleJob(action='Stop', target='search_rss_book')
            return

        myDB = database.DBConnection()

        resultlist, wishproviders = IterateOverWishLists()
        new_books = 0
        if not wishproviders:
            logger.debug('No wishlists are set')
        else:
            # for each item in resultlist, add to database if necessary, and mark as wanted
            logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist))))
            for book in resultlist:
                # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid)
                # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks
                # not sure if anyone would use a goodreads wishlist if not using goodreads interface...
                if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']:
                    bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],))
                    if bookmatch:
                        bookstatus = bookmatch['Status']
                        bookname = bookmatch['BookName']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(u'Found book %s, already marked as "%s"' % (bookname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s, marking as "Wanted"' % bookname)
                            controlValueDict = {"BookID": book['rss_bookid']}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:
                        import_book(book['rss_bookid'])
                        new_books += 1
                else:
                    item = {}
                    headers = []
                    item['Title'] = book['rss_title']
                    if book['rss_bookid']:
                        item['BookID'] = book['rss_bookid']
                        headers.append('BookID')
                    if book['rss_isbn']:
                        item['ISBN'] = book['rss_isbn']
                        headers.append('ISBN')
                    bookmatch = finditem(item, book['rss_author'], headers)
                    if bookmatch:  # it's already in the database
                        authorname = bookmatch['AuthorName']
                        bookname = bookmatch['BookName']
                        bookid = bookmatch['BookID']
                        bookstatus = bookmatch['Status']
                        if bookstatus in ['Open', 'Wanted', 'Have']:
                            logger.info(
                                u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus))
                        else:  # skipped/ignored
                            logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname))
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {"Status": "Wanted"}
                            myDB.upsert("books", newValueDict, controlValueDict)
                            new_books += 1
                    else:  # not in database yet
                        results = ''
                        if book['rss_isbn']:
                            results = search_for(book['rss_isbn'])
                        if results:
                            result = results[0]  # type: dict
                            if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%%) %s: %s" %
                                            (result['isbn_fuzz'], result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True
                        if not results:
                            searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author']))
                            results = search_for(unaccented(searchterm))
                        if results:
                            result = results[0]  # type: dict
                            if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \
                                    and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90):
                                logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                          result['authorname'], result['bookname']))
                                import_book(result['bookid'])
                                new_books += 1
                                bookmatch = True

                    if not bookmatch:
                        msg = "Skipping book %s by %s" % (item['Title'], book['rss_author'])
                        if not results:
                            msg += ', No results returned'
                            logger.warn(msg)
                        else:
                            msg += ', No match found'
                            logger.warn(msg)
                            msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                        result['authorname'], result['bookname'])
                            logger.warn(msg)
        if new_books:
            logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books)))

        searchbooks = []
        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders = IterateOverRSSSites()
        if not nproviders and not wishproviders:
            logger.warn('No rss providers are available')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "eBook",
                         "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    searchlist.append(
                        {"bookid": searchbook['BookID'],
                         "bookName": searchbook['BookName'],
                         "bookSub": searchbook['BookSub'],
                         "authorName": searchbook['AuthorName'],
                         "library": "AudioBook",
                         "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > True:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #27
0
def downloadResult(match, book):
    """ match:  best result from search providers
        book:   book we are downloading (needed for reporting author name)
        return: 0 if failed to snatch
                1 if already snatched
                2 if we snatched it
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        newValueDict = match[1]
        controlValueDict = match[2]

        # It's possible to get book and wanted tables "Snatched" status out of sync
        # for example if a user marks a book as "Wanted" after a search task snatches it and before postprocessor runs
        # so check status in both tables here
        snatched = myDB.match('SELECT BookID from wanted WHERE BookID=? and AuxInfo=? and Status="Snatched"',
                              (newValueDict["BookID"], newValueDict["AuxInfo"]))
        if snatched:
            logger.debug('%s %s %s already marked snatched in wanted table' %
                         (newValueDict["AuxInfo"], book['authorName'], book['bookName']))
            return 1  # someone else already found it

        if newValueDict["AuxInfo"] == 'eBook':
            snatched = myDB.match('SELECT BookID from books WHERE BookID=? and Status="Snatched"',
                                  (newValueDict["BookID"],))
        else:
            snatched = myDB.match('SELECT BookID from books WHERE BookID=? and AudioStatus="Snatched"',
                                  (newValueDict["BookID"],))
        if snatched:
            logger.debug('%s %s %s already marked snatched in book table' %
                         (newValueDict["AuxInfo"], book['authorName'], book['bookName']))
            return 1  # someone else already found it

        myDB.upsert("wanted", newValueDict, controlValueDict)
        if newValueDict['NZBmode'] == 'direct':
            snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                          controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] in ["torznab", "torrent", "magnet"]:
            snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        elif newValueDict['NZBmode'] == 'nzb':
            snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"],
                                       controlValueDict["NZBurl"], newValueDict["AuxInfo"])
        else:
            logger.error('Unhandled NZBmode [%s] for %s' % (newValueDict['NZBmode'], controlValueDict["NZBurl"]))
            snatch = 0

        if snatch:
            logger.info('Downloading %s %s from %s' %
                        (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"]))
            custom_notify_snatch("%s %s" % (newValueDict["BookID"], newValueDict['AuxInfo']))
            notify_snatch("%s %s from %s at %s" %
                          (newValueDict["AuxInfo"], newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
            # at this point we could add NZBprov to the blocklist with a short timeout, a second or two?
            # This would implement a round-robin search system. Blocklist with an incremental counter.
            # If number of active providers == number blocklisted, so no unblocked providers are left,
            # either sleep for a while, or unblock the one with the lowest counter.
            scheduleJob(action='Start', target='processDir')
            return 2  # we found it
        return 0
    except Exception:
        logger.error('Unhandled exception in downloadResult: %s' % traceback.format_exc())
        return 0
예제 #28
0
def search_rss_book(books=None, library=None):
    """
    books is a list of new books to add, or None for backlog search
    library is "eBook" or "AudioBook" or None to search all book types
    """
    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not books:
                threading.currentThread().name = "SEARCHALLRSS"
            else:
                threading.currentThread().name = "SEARCHRSS"

        myDB = database.DBConnection()

        searchbooks = []
        if not books:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus '
            cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") '
            cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc'
            results = myDB.select(cmd)
            for terms in results:
                searchbooks.append(terms)
        else:
            # The user has added a new book
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus '
                cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID'
                results = myDB.select(cmd, (book['bookid'],))
                for terms in results:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            logger.debug("SearchRSS - No books to search for")
            return

        resultlist, nproviders, _ = IterateOverRSSSites()
        if not nproviders:
            logger.warn('No rss providers are available')
            scheduleJob(action='Stop', target='search_rss_book')
            return  # No point in continuing

        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        searchlist = []
        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            if library is None or library == 'eBook':
                if searchbook['Status'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('eBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "eBook",
                             "searchterm": searchterm})

            if library is None or library == 'AudioBook':
                if searchbook['AudioStatus'] == "Wanted":
                    cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"'
                    snatched = myDB.match(cmd, (searchbook["BookID"],))
                    if snatched:
                        logger.warn('AudioBook %s %s already marked snatched in wanted table' %
                                    (searchbook['AuthorName'], searchbook['BookName']))
                    else:
                        searchlist.append(
                            {"bookid": searchbook['BookID'],
                             "bookName": searchbook['BookName'],
                             "bookSub": searchbook['BookSub'],
                             "authorName": searchbook['AuthorName'],
                             "library": "AudioBook",
                             "searchterm": searchterm})

        rss_count = 0
        for book in searchlist:
            if book['library'] == 'AudioBook':
                searchtype = 'audio'
            else:
                searchtype = 'book'
            found = processResultList(resultlist, book, searchtype, 'rss')

            # if you can't find the book, try title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:  # anything to shorten?
                searchtype = 'short' + searchtype
                found = processResultList(resultlist, book, searchtype, 'rss')

            if not found:
                logger.info("RSS Searches for %s %s returned no results." % (book['library'], book['searchterm']))
            if found > 1:
                rss_count += 1

        logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    except Exception:
        logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #29
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '.', ';': ''}

    match_ratio = int(lazylibrarian.CONFIG['MATCH_RATIO'])
    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
        if not rejected:
            if minsize and tor_size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
예제 #30
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
예제 #31
0
def search_tor_book(books=None, reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if books is None:
                threading.currentThread().name = "SEARCHALLTOR"
            else:
                threading.currentThread().name = "SEARCHTOR"

        if not lazylibrarian.USE_TOR():
            logger.warn('No Torrent providers set, check config')
            return

        if not internet():
            logger.warn('Search Tor Book: No internet connection')
            return

        myDB = database.DBConnection()
        searchlist = []

        if books is None:
            # We are performing a backlog search
            cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors '
            cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc'
            searchbooks = myDB.select(cmd)
        else:
            # The user has added a new book
            searchbooks = []
            for book in books:
                cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors WHERE books.Status="Wanted"'
                cmd += ' and books.AuthorID = authors.AuthorID and BookID="%s"' % book['bookid']
                searchbook = myDB.select(cmd)
                for terms in searchbook:
                    searchbooks.append(terms)

        if len(searchbooks) == 0:
            return

        logger.info('TOR Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

        for searchbook in searchbooks:
            # searchterm is only used for display purposes
            searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName']
            if searchbook['BookSub']:
                searchterm = searchterm + ': ' + searchbook['BookSub']

            searchlist.append(
                {"bookid": searchbook['BookID'],
                 "bookName": searchbook['BookName'],
                 "bookSub": searchbook['BookSub'],
                 "authorName": searchbook['AuthorName'],
                 "searchterm": searchterm})

        tor_count = 0
        for book in searchlist:

            resultlist, nproviders = IterateOverTorrentSites(book, 'book')
            if not nproviders:
                logger.warn('No torrent providers are set, check config')
                return  # No point in continuing

            found = processResultList(resultlist, book, "book")

            # if you can't find the book, try author/title without any "(extended details, series etc)"
            if not found and '(' in book['bookName']:
                resultlist, nproviders = IterateOverTorrentSites(book, 'shortbook')
                found = processResultList(resultlist, book, "shortbook")

            # general search is the same as booksearch for torrents
            # if not found:
            #    resultlist, nproviders = IterateOverTorrentSites(book, 'general')
            #    found = processResultList(resultlist, book, "general")

            if not found:
                logger.debug("Searches for %s returned no results." % book['searchterm'])
            if found > True:
                tor_count += 1

        logger.info("TORSearch for Wanted items complete, found %s book%s" % (tor_count, plural(tor_count)))

        if reset:
            scheduleJob(action='Restart', target='search_tor_book')

    except Exception:
        logger.error('Unhandled exception in search_tor_book: %s' % traceback.format_exc())
예제 #32
0
def search_rss_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHRSS"

    if not (lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return

    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select(
            'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc'
        )
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select(
                'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        return

    logger.info('RSS Searching for %i book%s' %
                (len(searchbooks), plural(len(searchbooks))))

    resultlist, nproviders = IterateOverRSSSites()
    if not nproviders:
        logger.warn('No rss providers are set, check config')
        return  # No point in continuing

    dic = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': '',
        '*': '',
        ':': '',
        ';': ''
    }

    rss_count = 0
    for book in searchbooks:
        authorname, bookname = get_searchterm(book, "book")
        found = processResultList(resultlist, authorname, bookname, book,
                                  'book')

        # if you can't find the book, try title without any "(extended details, series etc)"
        if not found:
            if '(' in bookname:  # anything to shorten?
                authorname, bookname = get_searchterm(book, "shortbook")
                found = processResultList(resultlist, authorname, bookname,
                                          book, 'shortbook')

        if not found:
            logger.debug(
                "Searches returned no results. Adding book %s - %s to queue." %
                (authorname, bookname))
        if found > True:
            rss_count = rss_count + 1

    logger.info("RSS Search for Wanted items complete, found %s book%s" %
                (rss_count, plural(rss_count)))

    if reset:
        scheduleJob(action='Restart', target='search_rss_book')
예제 #33
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }
    # ' the ': ' ', ' a ': ' ', ' and ': ' ',
    # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': '',
        '*': '',
        ':': '',
        ';': '',
        '\'': ''
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'],
                                               dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" %
                     (nzbAuthor_match, nzbBook_match, nzb_Title))
        nzburl = nzb['nzburl']

        rejected = False

        already_failed = myDB.action(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            nzburl).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (nzb_Title, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in nzb_Title.lower(
                ) and not word in author.lower() and not word in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (nzb_Title, word))
                    break

        nzbsize_temp = nzb[
            'nzbsize']  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and nzbsize > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % nzb_Title)

        if not rejected:
            if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio:
                bookid = book['bookid']
                nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] +
                            ')').strip()
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": nzbsize,
                    "NZBtitle": nzbTitle,
                    "NZBmode": nzbmode,
                    "Status": "Skipped"
                }

                score = (nzbBook_match +
                         nzbAuthor_match) / 2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(nzb_Title))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append(
                    [score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match NZB (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"]).fetchone()
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it
        else:
            logger.debug('%s adding to wanted' % nzb_Title)
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' +
                                        now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No nzb's found for [%s] using searchtype %s" %
                     (book["searchterm"], searchtype))
    return False