Ejemplo n.º 1
0
def processOPF(dest_path=None, authorname=None, bookname=None, bookisbn=None, bookid=None,
               bookpub=None, bookdate=None, bookdesc=None, booklang=None, global_name=None):
    opfinfo = '<?xml version="1.0"  encoding="UTF-8"?>\n\
<package version="2.0" xmlns="http://www.idpf.org/2007/opf" >\n\
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n\
        <dc:title>%s</dc:title>\n\
        <creator>%s</creator>\n\
        <dc:language>%s</dc:language>\n\
        <dc:identifier scheme="GoogleBooks">%s</dc:identifier>\n' % (bookname, authorname, booklang, bookid)

    if bookisbn:
        opfinfo += '        <dc:identifier scheme="ISBN">%s</dc:identifier>\n' % bookisbn

    if bookpub:
        opfinfo += '        <dc:publisher>%s</dc:publisher>\n' % bookpub

    if bookdate:
        opfinfo += '        <dc:date>%s</dc:date>\n' % bookdate

    if bookdesc:
        opfinfo += '        <dc:description>%s</dc:description>\n' % bookdesc

    opfinfo += '        <guide>\n\
            <reference href="cover.jpg" type="cover" title="Cover"/>\n\
        </guide>\n\
    </metadata>\n\
</package>'

    dic = {'...': '', ' & ': ' ', ' = ': ' ', '$': 's', ' + ': ' ', ',': '', '*': ''}

    opfinfo = unaccented_str(replace_all(opfinfo, dic))

    # handle metadata
    opfpath = os.path.join(dest_path, global_name + '.opf')
    if not os.path.exists(opfpath):
        with open(opfpath, 'wb') as opf:
            opf.write(opfinfo)
        logger.debug('Saved metadata to: ' + opfpath)
    else:
        logger.debug('%s already exists. Did not create one.' % opfpath)
Ejemplo n.º 2
0
    def get_author_books(self, authorid=None, authorname=None, refresh=False):
      try:
        logger.debug('[%s] Now processing books with Google Books API' % authorname)
        # google doesnt like accents in author names
        set_url = self.url + urllib.quote('inauthor:"%s"' % unaccented_str(authorname))
        URL = set_url + '&' + urllib.urlencode(self.params)

        api_hits = 0
        gr_lang_hits = 0
        lt_lang_hits = 0
        gb_lang_change = 0
        cache_hits = 0
        not_cached = 0

        # Artist is loading
        myDB = database.DBConnection()
        controlValueDict = {"AuthorID": authorid}
        newValueDict = {"Status": "Loading"}
        myDB.upsert("authors", newValueDict, controlValueDict)

        try:
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = ([valid_lang.strip()
                           for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')])

            while startindex < number_results:

                self.params['startIndex'] = startindex
                URL = set_url + '&' + urllib.urlencode(self.params)

                try:
                    jsonresults, in_cache = get_json_request(URL, useCache=not refresh)
                    if jsonresults is None:
                        number_results = 0
                    else:
                        if not in_cache:
                            api_hits = api_hits + 1
                        number_results = jsonresults['totalItems']
                except HTTPError as err:
                    logger.warn(
                        'Google Books API Error [%s]: Check your API key or wait a while' %
                        err.reason)
                    break

                if number_results == 0:
                    logger.warn('Found no results for %s' % authorname)
                    break
                else:
                    logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                startindex = startindex + 40

                for item in jsonresults['items']:

                    total_count = total_count + 1

                    # skip if no author, no author is no book.
                    try:
                        Author = item['volumeInfo']['authors'][0]
                    except KeyError:
                        logger.debug('Skipped a result without authorfield.')
                        continue

                    try:
                        if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                            bookisbn = item['volumeInfo'][
                                'industryIdentifiers'][0]['identifier']
                        else:
                            bookisbn = ""
                    except KeyError:
                        bookisbn = ""

                    isbnhead = ""
                    if len(bookisbn) == 10:
                        isbnhead = bookisbn[0:3]

                    try:
                        booklang = item['volumeInfo']['language']
                    except KeyError:
                        booklang = "Unknown"

                    # do we care about language?
                    if "All" not in valid_langs:
                        if bookisbn != "":
                            # seems google lies to us, sometimes tells us books
                            # are in english when they are not
                            if booklang == "Unknown" or booklang == "en":
                                googlelang = booklang
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' %
                                                   (isbnhead))
                                if match:
                                    booklang = match['lang']
                                    cache_hits = cache_hits + 1
                                    logger.debug(
                                        "Found cached language [%s] for [%s]" %
                                        (booklang, isbnhead))

                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # librarything returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + \
                                        bookisbn
                                    try:
                                        librarything_wait()
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits = lt_lang_hits + 1
                                        logger.debug(
                                            "LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if (resp != 'invalid' and resp != 'unknown'):
                                            booklang = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, booklang))
                                            logger.debug(u"LT language: " + booklang)
                                    except Exception as e:
                                        booklang = ""
                                        logger.error("Error finding language: %s" % str(e))

                                if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                    # these are all english, may need to expand
                                    # this list
                                    booknamealt = item['volumeInfo']['title']
                                    logger.debug("%s Google thinks [%s], we think [%s]" %
                                                 (booknamealt, googlelang, booklang))
                                    gb_lang_change = gb_lang_change + 1
                            else:
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' %
                                                   (isbnhead))
                                if not match:
                                    myDB.action(
                                        'insert into languages values ("%s", "%s")' %
                                        (isbnhead, booklang))
                                    logger.debug(u"GB language: " + booklang)

                        # skip if language is in ignore list
                        if booklang not in valid_langs:
                            booknamealt = item['volumeInfo']['title']
                            logger.debug(
                                'Skipped [%s] with language %s' %
                                (booknamealt, booklang))
                            ignored = ignored + 1
                            continue

                    try:
                        bookpub = item['volumeInfo']['publisher']
                    except KeyError:
                        bookpub = None

                    try:
                        booksub = item['volumeInfo']['subtitle']
                    except KeyError:
                        booksub = None

                    if booksub is None:
                        series = None
                        seriesNum = None
                    else:
                        try:
                            series = booksub.split('(')[1].split(' Series ')[0]
                        except IndexError:
                            series = None
                        try:
                            seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                            if seriesNum[0] == '#':
                                seriesNum = seriesNum[1:]
                        except IndexError:
                            seriesNum = None

                    try:
                        bookdate = item['volumeInfo']['publishedDate']
                    except KeyError:
                        bookdate = '0000-00-00'

                    try:
                        bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                    except KeyError:
                        bookimg = 'images/nocover.png'

                    try:
                        bookrate = item['volumeInfo']['averageRating']
                    except KeyError:
                        bookrate = 0

                    try:
                        bookpages = item['volumeInfo']['pageCount']
                    except KeyError:
                        bookpages = 0

                    try:
                        bookgenre = item['volumeInfo']['categories'][0]
                    except KeyError:
                        bookgenre = None

                    try:
                        bookdesc = item['volumeInfo']['description']
                    except KeyError:
                        bookdesc = None

                    bookname = item['volumeInfo']['title']
                    bookname = unaccented(bookname)
                    dic = {':': '', '"': '', '\'': ''}
                    bookname = replace_all(bookname, dic)
                    bookname = bookname.strip()  # strip whitespace

                    booklink = item['volumeInfo']['canonicalVolumeLink']
                    bookrate = float(bookrate)
                    bookid = item['id']

                    # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                    # and sometimes uses the same bookid if the book is the same but the title is slightly different
                    #
                    # Not sure if googlebooks does too, but we only want one...
                    find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid)
                    if find_book_status:
                        for resulted in find_book_status:
                            book_status = resulted['Status']
                            locked = resulted['Manual']
                    else:
                        book_status = lazylibrarian.NEWBOOK_STATUS
                        locked = False

                    rejected = False
                    if re.match('[^\w-]', bookname):  # remove books with bad characters in title
                        logger.debug("[%s] removed book for bad characters" % bookname)
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected and not bookname:
                        logger.debug('Rejecting bookid %s for %s, no bookname' %
                                     (bookid, authorname))
                        removedResults = removedResults + 1
                        rejected = True

                    if not rejected:
                        find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' %
                                                 (bookname.replace('"', '""'), authorname.replace('"', '""')))
                        if find_books:
                            for find_book in find_books:
                                if find_book['BookID'] != bookid:
                                    # we have a book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (find_book['BookID'], authorname, bookname, bookid))
                                    rejected = True
                                    duplicates = duplicates + 1

                    if not rejected:
                        find_books = myDB.match('SELECT AuthorName,BookName FROM books WHERE BookID = "%s"' % bookid)
                        if find_books:
                            # we have a book with this bookid already
                            if bookname != find_books['BookName'] or authorNameResult != find_books['AuthorName']:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                            (bookid, authorname, bookname,
                                             find_books['AuthorName'], find_books['BookName']))
                            else:
                                logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                             (bookid, authorname, bookname))
                            duplicates = duplicates + 1
                            rejected = True

                    if not rejected:
                        if book_status != "Ignored" and not locked:
                            controlValueDict = {"BookID": bookid}
                            newValueDict = {
                                "AuthorName": authorname,
                                "AuthorID": authorid,
                                "AuthorLink": "",
                                "BookName": bookname,
                                "BookSub": booksub,
                                "BookDesc": bookdesc,
                                "BookIsbn": bookisbn,
                                "BookPub": bookpub,
                                "BookGenre": bookgenre,
                                "BookImg": bookimg,
                                "BookLink": booklink,
                                "BookRate": bookrate,
                                "BookPages": bookpages,
                                "BookDate": bookdate,
                                "BookLang": booklang,
                                "Status": book_status,
                                "BookAdded": today(),
                                "Series": series,
                                "SeriesNum": seriesNum
                            }
                            resultcount = resultcount + 1

                            myDB.upsert("books", newValueDict, controlValueDict)
                            logger.debug(u"Book found: " + bookname + " " + bookdate)

                            if 'nocover' in bookimg or 'nophoto' in bookimg:
                                # try to get a cover from librarything
                                workcover = getBookCover(bookid)
                                if workcover:
                                    logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": workcover}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            elif bookimg and bookimg.startswith('http'):
                                link = cache_cover(bookid, bookimg)
                                if link:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"BookImg": link}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            if seriesNum is None:
                                # try to get series info from librarything
                                series, seriesNum = getWorkSeries(bookid)
                                if seriesNum:
                                    logger.debug(u'Updated series: %s [%s]' % (series, seriesNum))
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {
                                        "Series": series,
                                        "SeriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict, controlValueDict)

                            worklink = getWorkPage(bookid)
                            if worklink:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {"WorkPage": worklink}
                                myDB.upsert("books", newValueDict, controlValueDict)

                            if not find_book_status:
                                logger.debug("[%s] Added book: %s [%s]" % (authorname, bookname, booklang))
                                added_count = added_count + 1
                            else:
                                updated_count = updated_count + 1
                                logger.debug("[%s] Updated book: %s" % (authorname, bookname))
                        else:
                            book_ignore_count = book_ignore_count + 1
        except KeyError:
            pass

        logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                     (authorname, api_hits, plural(api_hits)))

        lastbook = myDB.match('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \
                               AND Status != "Ignored" order by BookDate DESC' % authorid)

        if lastbook:  # maybe there are no books [remaining] for this author
            lastbookname = lastbook['BookName']
            lastbooklink = lastbook['BookLink']
            lastbookdate = lastbook['BookDate']
        else:
            lastbookname = None
            lastbooklink = None
            lastbookdate = None

        controlValueDict = {"AuthorID": authorid}
        newValueDict = {
            "Status": "Active",
            "LastBook": lastbookname,
            "LastLink": lastbooklink,
            "LastDate": lastbookdate
        }

        myDB.upsert("authors", newValueDict, controlValueDict)

        logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
        logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
        logger.debug(
            "Removed %s bad character or no-name result%s for author" %
            (removedResults, plural(removedResults)))
        logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
        logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
        logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

        myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                    (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits,
                     ignored, removedResults, not_cached, duplicates))

        if refresh:
            logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                        (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
        else:
            logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                        (authorname, added_count, plural(added_count)))

      except Exception as e:
        logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Ejemplo n.º 3
0
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None):

    # check we got a book/magazine in the downloaded files, if not, return
    if bookname:
        booktype = 'book'
    else:
        booktype = 'mag'

    got_book = False
    for bookfile in os.listdir(pp_path):
        if is_valid_booktype(bookfile, booktype=booktype):
            got_book = bookfile
            break

    if got_book is False:
        # no book/mag found in a format we wanted. Leave for the user to delete or convert manually
        logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path)
        return False

    # Do we want calibre to import the book for us
    if bookname and len(lazylibrarian.IMP_CALIBREDB):
        try:
            logger.debug('Creating metadata for calibre')
            dest_path=pp_path
            global_name=os.path.splitext(got_book)[0]
            bookid=''
            booklang=''
            bookisbn=''
            bookpub=''
            bookdate=''
            bookdesc=''
            processOPF(dest_path, authorname, bookname, bookisbn, bookid, bookpub, bookdate, bookdesc, booklang, global_name)
            logger.debug('Importing %s, %s into calibre library' % (authorname, bookname))
            params = [lazylibrarian.IMP_CALIBREDB, 'add', '-1', '--with-library', lazylibrarian.DESTINATION_DIR, pp_path]
            logger.debug(str(params))
            res = subprocess.check_output(params, stderr=subprocess.STDOUT)
            if res:
                logger.debug('%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res)))
            calibre_dir = os.path.join(lazylibrarian.DESTINATION_DIR, unaccented_str(authorname), '')
            if os.path.isdir(calibre_dir):
                imported = LibraryScan(calibre_dir)  # rescan authors directory so we get the new book in our database
            else:
                imported = LibraryScan(lazylibrarian.DESTINATION_DIR)  # may have to rescan whole library instead
            if not imported and not 'already exist' in res:
                return False
        except subprocess.CalledProcessError as e:
            logger.debug(params)
            logger.debug('calibredb import failed: %s' % e.output)
            return False
        except OSError as e:
            logger.debug('calibredb failed, %s' % e.strerror)
            return False

    else:
        # we are copying the files ourselves
        if not os.path.exists(dest_path):
            logger.debug('%s does not exist, so it\'s safe to create it' % dest_path)
        elif not os.path.isdir(dest_path):
            logger.debug('%s exists but is not a directory, deleting it' % dest_path)
            try:
                os.remove(dest_path)
            except OSError as why:
                logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror))
                return False

        if not os.path.exists(dest_path):
            try:
                os.makedirs(dest_path)
            except OSError as why:
                logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror))
                return False

        # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly.
        # After the copy completes, delete source files if DESTINATION_COPY not set,
        # but don't delete source files if copy failed or if in root of download dir
        for fname in os.listdir(pp_path):
            if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \
                    is_valid_booktype(fname, booktype=booktype):
                logger.debug('Copying %s to directory %s' % (fname, dest_path))
                try:
                    shutil.copyfile(os.path.join(pp_path, fname), os.path.join(
                        dest_path, global_name + os.path.splitext(fname)[1]))
                except Exception as why:
                    logger.debug("Failed to copy file %s to %s, %s" % (
                        fname, dest_path, str(why)))
                    return False
            else:
                logger.debug('Ignoring unwanted file: %s' % fname)

    # calibre or ll copied the files we want, now delete source files if not in download root dir
    if not lazylibrarian.DESTINATION_COPY:
        if pp_path != lazylibrarian.DOWNLOAD_DIR:
            try:
                shutil.rmtree(pp_path)
            except Exception as why:
                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))
                return False
    return True
Ejemplo n.º 4
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
        (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath, os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug("Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                    (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                    '$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                       ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                    lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0]['IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(
                            lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                        '$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug("Snatched magazine %s is not in download directory" % (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug('Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                else:
                    newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                    "IssueStatus": "Open"}
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                newValueDict = {"IssueAcquired": today(),
                                "IssueFile": dest_file,
                                "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
Ejemplo n.º 5
0
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': ' ',
            '*': '',
            '(': '',
            ')': '',
            '[': '',
            ']': '',
            '#': '',
            '0': '',
            '1': '',
            '2': '',
            '3': '',
            '4': '',
            '5': '',
            '6': '',
            '7': '',
            '8': '',
            '9': '',
            '\'': '',
            ':': '',
            '!': '',
            '-': ' ',
            '\s\s': ' '
        }

        dic = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '.',
            ';': '',
            '\'': ''
        }

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' %
                     (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(
                replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ",
                                 resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match,
                              resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                blacklisted = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                    (url, ))
                if blacklisted:
                    logger.debug(
                        "Rejecting %s, url blacklisted (Failed) at %s" %
                        (resultTitle, blacklisted['NZBprov']))
                    rejected = True
                if not rejected:
                    blacklisted = myDB.match(
                        'SELECT * from wanted WHERE NZBprov=? and NZBtitle=? and Status="Failed"',
                        (res[prefix + 'prov'], resultTitle))
                    if blacklisted:
                        logger.debug(
                            "Rejecting %s, title blacklisted (Failed) at %s" %
                            (resultTitle, blacklisted['NZBprov']))
                        rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                blacklisted = myDB.match('SELECT * from wanted WHERE NZBurl=?',
                                         (url, ))
                if blacklisted:
                    logger.debug("Rejecting %s, url blacklisted (%s) at %s" %
                                 (resultTitle, blacklisted['Status'],
                                  blacklisted['NZBprov']))
                    rejected = True
                if not rejected:
                    blacklisted = myDB.match(
                        'SELECT * from wanted WHERE NZBprov=? and NZBtitle=?',
                        (res[prefix + 'prov'], resultTitle))
                    if blacklisted:
                        logger.debug(
                            "Rejecting %s, title blacklisted (%s) at %s" %
                            (resultTitle, blacklisted['Status'],
                             blacklisted['NZBprov']))
                        rejected = True

            if not rejected and not url.startswith(
                    'http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" %
                             (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" %
                                     (resultTitle, word))
                        break

            size_temp = check_int(
                res[prefix + 'size'],
                1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large (%sMb)" %
                             (resultTitle, size))

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small (%sMb)" %
                             (resultTitle, size))

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res[
                        'tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Matched"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [
                    x for x in wordlist if x not in getList(author.lower())
                ]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [
                        x for x in words
                        if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [
                        x for x in words if x not in getList(
                            lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [
                                i for i, x in enumerate(typelist) if x == item
                        ]:
                            score += i + 1

                matches.append(
                    [score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info(
                    'Nearest match (%s%%): %s using %s search for %s %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     book['authorName'], book['bookName']))
            else:
                logger.info(
                    'Best match (%s%%): %s using %s search, %s priority %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" %
                         (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' %
                     traceback.format_exc())
Ejemplo n.º 6
0
def TORDownloadMethod(bookid=None,
                      tor_title=None,
                      tor_url=None,
                      library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC',
                                          value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url,
                             headers=headers,
                             timeout=90,
                             proxies=proxies)
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            if hasattr(e, 'reason'):
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, e.reason))
            else:
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, str(e)))
            return False

        torrent = r.content

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm'
                    setperm(tor_path)
                    msg += 'Saved'
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.debug("Failed to write magnet to file: %s %s" %
                                 (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                    tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved'
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.debug("Failed to write torrent to file: %s %s" %
                             (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[
            'UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[
            'RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[
                'QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url,
                                        hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[
                'TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[
            'DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet',
                                             tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url,
                                             args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID,
                                        lazylibrarian.CONFIG['DELUGE_LABEL'])
                    result = client.call('core.get_torrent_status', downloadID,
                                         {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s %s' %
                             (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn(
                    '%s: name contains hash, probably unresolved magnet' %
                    Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" %
                                 library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" %
                                     (tor_title, word))
                        break
                if rejected:
                    myDB.action(
                        'UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                        (full_url, ))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' %
                                 (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?',
                                (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                (full_url, ))
    return False
Ejemplo n.º 7
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '.', ';': ''}

    match_ratio = int(lazylibrarian.CONFIG['MATCH_RATIO'])
    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
        if not rejected:
            if minsize and tor_size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT BookID from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                custom_notify_snatch(newValueDict["BookID"])
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
Ejemplo n.º 8
0
def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None):

    # check we got a book/magazine in the downloaded files, if not, return
    if bookname:
        booktype = 'book'
    else:
        booktype = 'mag'

    got_book = False
    for bookfile in os.listdir(pp_path):
        if is_valid_booktype(bookfile, booktype=booktype):
            got_book = bookfile
            break

    if got_book is False:
        # no book/mag found in a format we wanted. Leave for the user to delete or convert manually
        logger.warn('Failed to locate a book/magazine in %s, leaving for manual processing' % pp_path)
        return False

    # Do we want calibre to import the book for us
    if bookname and len(lazylibrarian.IMP_CALIBREDB):
        processpath = lazylibrarian.DIRECTORY('Destination')
        try:
            logger.debug('Importing %s into calibre library' % (global_name))
            # calibre is broken, ignores metadata.opf and book_name.opf
            # also ignores --title and --author as parameters
            # so we have to configure calibre to parse the filename for author/title
            # and rename the book to the format we want calibre to use
            for bookfile in os.listdir(pp_path):
                filename, extn = os.path.splitext(bookfile)
                # calibre does not like quotes in author names
                os.rename(os.path.join(pp_path, filename + extn), os.path.join(
                    pp_path, global_name.replace('"', '_') + extn))

            params = [lazylibrarian.IMP_CALIBREDB,
                      'add',
                      # '--title="%s"' % bookname,
                      # '--author="%s"' % unaccented(authorname),
                      '-1',
                      '--with-library',
                      processpath, pp_path
                      ]
            logger.debug(str(params))
            res = subprocess.check_output(params, stderr=subprocess.STDOUT)
            if res:
                logger.debug('%s reports: %s' % (lazylibrarian.IMP_CALIBREDB, unaccented_str(res)))
                if 'already exist' in res:
                    logger.warn('Calibre failed to import %s %s, reports book already exists' % (authorname, bookname))

            # calibre does not like quotes in author names
            calibre_dir = os.path.join(processpath, unaccented_str(authorname.replace('"', '_')), '')
            if os.path.isdir(calibre_dir):
                imported = LibraryScan(calibre_dir)  # rescan authors directory so we get the new book in our database
            else:
                logger.error("Failed to locate calibre dir [%s]" % calibre_dir)
                imported = False
                # imported = LibraryScan(processpath)  # may have to rescan whole library instead
            if not imported:
                return False
        except subprocess.CalledProcessError as e:
            logger.debug(params)
            logger.debug('calibredb import failed: %s' % e.output)
            return False
        except OSError as e:
            logger.debug('calibredb failed, %s' % e.strerror)
            return False

    else:
        # we are copying the files ourselves, either it's a magazine or we don't want to use calibre
        if not os.path.exists(dest_path):
            logger.debug('%s does not exist, so it\'s safe to create it' % dest_path)
        elif not os.path.isdir(dest_path):
            logger.debug('%s exists but is not a directory, deleting it' % dest_path)
            try:
                os.remove(dest_path)
            except OSError as why:
                logger.debug('Failed to delete %s, %s' % (dest_path, why.strerror))
                return False

        if not os.path.exists(dest_path):
            try:
                os.makedirs(dest_path)
            except OSError as why:
                logger.debug('Failed to create directory %s, %s' % (dest_path, why.strerror))
                return False
            setperm(dest_path)

        # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly.
        for fname in os.listdir(pp_path):
            if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \
                    is_valid_booktype(fname, booktype=booktype):
                logger.debug('Copying %s to directory %s' % (fname, dest_path))
                try:
                    shutil.copyfile(os.path.join(pp_path, fname), os.path.join(
                        dest_path, global_name + os.path.splitext(fname)[1]))
                    setperm(os.path.join(dest_path, global_name + os.path.splitext(fname)[1]))
                except Exception as why:
                    logger.debug("Failed to copy file %s to %s, %s" % (
                        fname, dest_path, str(why)))
                    return False
            else:
                logger.debug('Ignoring unwanted file: %s' % fname)
    return True
Ejemplo n.º 9
0
def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid, ))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR(
    ) + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' %
                 (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov, dltypes = IterateOverRSSSites()
        if nprov and dltypes != 'M':
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'dispname' in item:
            provider = item['dispname']
        elif 'nzbprov' in item:
            provider = item['nzbprov']
        elif 'tor_prov' in item:
            provider = item['tor_prov']
        elif 'rss_prov' in item:
            provider = item['rss_prov']
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                result = {
                    'score': score,
                    'title': title,
                    'provider': provider,
                    'size': size,
                    'date': date,
                    'url': quote_plus(url.encode('utf-8')),
                    'mode': mode
                }

                searchresults.append(result)

    logger.debug('Found %s %s results for %s' %
                 (len(searchresults), cat, searchterm))
    return searchresults
Ejemplo n.º 10
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                ':': '', '!': '', '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ',
                # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': '', '\'': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'], dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title))
        nzburl = nzb['nzburl']

        rejected = False

        already_failed = myDB.action('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                    nzburl).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (nzb_Title, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (nzb_Title, word))
                    break

        nzbsize_temp = nzb['nzbsize']  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and nzbsize > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % nzb_Title)

        if not rejected:
            if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio:
                bookid = book['bookid']
                nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": nzbsize,
                    "NZBtitle": nzbTitle,
                    "NZBmode": nzbmode,
                    "Status": "Skipped"
                }

                score = (nzbBook_match + nzbAuthor_match)/2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(nzb_Title))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append([score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match NZB (%s%%): %s using %s search' %
            (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it
        else:
            logger.debug('%s adding to wanted' % nzb_Title)
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No nzb's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
Ejemplo n.º 11
0
def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov, dltypes = IterateOverRSSSites()
        if nprov and dltypes != 'M':
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'dispname' in item:
            provider = item['dispname']
        elif 'nzbprov' in item:
            provider = item['nzbprov']
        elif 'tor_prov' in item:
            provider = item['tor_prov']
        elif 'rss_prov' in item:
            provider = item['rss_prov']
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            url = url.encode('utf-8')
            if mode == 'torznab':
                # noinspection PyTypeChecker
                if url.startswith('magnet'):
                    mode = 'magnet'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': quote_plus(url), 'mode': mode}

                searchresults.append(result)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults
Ejemplo n.º 12
0
def processDestination(pp_path=None,
                       dest_path=None,
                       authorname=None,
                       bookname=None,
                       global_name=None):

    # check we got a book/magazine in the downloaded files, if not, return
    if bookname:
        booktype = 'book'
    else:
        booktype = 'mag'

    got_book = False
    for bookfile in os.listdir(pp_path):
        if is_valid_booktype(bookfile, booktype=booktype):
            got_book = bookfile
            break

    if got_book is False:
        # no book/mag found in a format we wanted. Leave for the user to delete or convert manually
        logger.warn(
            'Failed to locate a book/magazine in %s, leaving for manual processing'
            % pp_path)
        return False

    # Do we want calibre to import the book for us
    if bookname and len(lazylibrarian.IMP_CALIBREDB):
        try:
            logger.debug('Creating metadata for calibre')
            dest_path = pp_path
            global_name = os.path.splitext(got_book)[0]
            bookid = ''
            booklang = ''
            bookisbn = ''
            bookpub = ''
            bookdate = ''
            bookdesc = ''
            processOPF(dest_path, authorname, bookname, bookisbn, bookid,
                       bookpub, bookdate, bookdesc, booklang, global_name)
            logger.debug('Importing %s, %s into calibre library' %
                         (authorname, bookname))
            params = [
                lazylibrarian.IMP_CALIBREDB, 'add', '-1', '--with-library',
                lazylibrarian.DESTINATION_DIR, pp_path
            ]
            logger.debug(str(params))
            res = subprocess.check_output(params, stderr=subprocess.STDOUT)
            if res:
                logger.debug(
                    '%s reports: %s' %
                    (lazylibrarian.IMP_CALIBREDB, unaccented_str(res)))
            calibre_dir = os.path.join(lazylibrarian.DESTINATION_DIR,
                                       unaccented_str(authorname), '')
            if os.path.isdir(calibre_dir):
                imported = LibraryScan(
                    calibre_dir
                )  # rescan authors directory so we get the new book in our database
            else:
                imported = LibraryScan(
                    lazylibrarian.DESTINATION_DIR
                )  # may have to rescan whole library instead
            if not imported and not 'already exist' in res:
                return False
        except subprocess.CalledProcessError as e:
            logger.debug(params)
            logger.debug('calibredb import failed: %s' % e.output)
            return False
        except OSError as e:
            logger.debug('calibredb failed, %s' % e.strerror)
            return False

    else:
        # we are copying the files ourselves
        if not os.path.exists(dest_path):
            logger.debug('%s does not exist, so it\'s safe to create it' %
                         dest_path)
        elif not os.path.isdir(dest_path):
            logger.debug('%s exists but is not a directory, deleting it' %
                         dest_path)
            try:
                os.remove(dest_path)
            except OSError as why:
                logger.debug('Failed to delete %s, %s' %
                             (dest_path, why.strerror))
                return False

        if not os.path.exists(dest_path):
            try:
                os.makedirs(dest_path)
            except OSError as why:
                logger.debug('Failed to create directory %s, %s' %
                             (dest_path, why.strerror))
                return False

        # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly.
        # After the copy completes, delete source files if DESTINATION_COPY not set,
        # but don't delete source files if copy failed or if in root of download dir
        for fname in os.listdir(pp_path):
            if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \
                    is_valid_booktype(fname, booktype=booktype):
                logger.debug('Copying %s to directory %s' % (fname, dest_path))
                try:
                    shutil.copyfile(
                        os.path.join(pp_path, fname),
                        os.path.join(dest_path,
                                     global_name + os.path.splitext(fname)[1]))
                except Exception as why:
                    logger.debug("Failed to copy file %s to %s, %s" %
                                 (fname, dest_path, str(why)))
                    return False
            else:
                logger.debug('Ignoring unwanted file: %s' % fname)

    # calibre or ll copied the files we want, now delete source files if not in download root dir
    if not lazylibrarian.DESTINATION_COPY:
        if pp_path != lazylibrarian.DOWNLOAD_DIR:
            try:
                shutil.rmtree(pp_path)
            except Exception as why:
                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))
                return False
    return True
Ejemplo n.º 13
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        # try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        # except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower(
        ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                '/', '\\')

        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
            '$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
            '$Author', authorname).replace('$Title', bookname)
        global_name = unaccented(global_name)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {
            '<': '',
            '>': '',
            '...': '',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '',
            ';': '',
            '\'': ''
        }
        dest_path = unaccented_str(replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                 dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name)

        if processBook:
            # update nzbs
            was_snatched = len(
                myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' %
                            bookID))
            if was_snatched:
                controlValueDict = {"BookID": bookID}
                newValueDict = {
                    "Status": "Processed",
                    "NZBDate": now()
                }  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)
            if bookname:
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            logger.info('Successfully processed: %s' % global_name)
            notify_download("%s at %s" % (global_name, now()))
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            was_snatched = len(
                myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' %
                            bookID))
            if was_snatched:
                controlValueDict = {"BookID": bookID}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
            # reset status so we try for a different version
            myDB.action(
                'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                bookID)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)
    return False
Ejemplo n.º 14
0
def processDir(reset=False):

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "POSTPROCESS"

    if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(
            lazylibrarian.DOWNLOAD_DIR):
        processpath = os.getcwd()
    else:
        processpath = lazylibrarian.DOWNLOAD_DIR

    logger.debug(' Checking [%s] for files to post process' % processpath)

    try:
        downloads = os.listdir(processpath)
    except OSError as why:
        logger.error('Could not access [%s] directory [%s]' %
                     (processpath, why.strerror))
        return

    myDB = database.DBConnection()
    snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

    if len(snatched) == 0:
        logger.info('Nothing marked as snatched.')
        scheduleJob(action='Stop', target='processDir')
        return

    if len(downloads) == 0:
        logger.info('No downloads are found. Nothing to process.')
        return

    logger.info("Checking %s download%s for %s snatched file%s" %
                (len(downloads), plural(
                    len(downloads)), len(snatched), plural(len(snatched))))
    ppcount = 0
    for book in snatched:
        matches = []
        for fname in downloads:
            if not fname.endswith('.fail'):  # has this failed before?
                # this is to get round differences in torrent filenames.
                # Torrents aren't always returned with the name we searched for
                # there might be a better way...
                if isinstance(fname, str):
                    matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                else:
                    matchname = fname
                if ' LL.(' in matchname:
                    matchname = matchname.split(' LL.(')[0]
                matchtitle = book['NZBtitle']
                match = 0
                if matchtitle:
                    if ' LL.(' in matchtitle:
                        matchtitle = matchtitle.split(' LL.(')[0]
                    match = fuzz.token_set_ratio(matchtitle, matchname)
                if match >= lazylibrarian.DLOAD_RATIO:
                    fname = matchname
                    if os.path.isfile(os.path.join(processpath, fname)):
                        # handle single file downloads here...
                        if is_valid_booktype(fname, booktype="book") \
                            or is_valid_booktype(fname, booktype="mag"):
                            dirname = os.path.join(processpath,
                                                   os.path.splitext(fname)[0])
                            if not os.path.exists(dirname):
                                try:
                                    os.makedirs(dirname)
                                except OSError as why:
                                    logger.debug(
                                        'Failed to create directory %s, %s' %
                                        (dirname, why.strerror))
                            if os.path.exists(dirname):
                                try:
                                    shutil.move(
                                        os.path.join(processpath, fname),
                                        os.path.join(dirname, fname))
                                    fname = os.path.splitext(fname)[0]
                                except Exception as why:
                                    logger.debug(
                                        "Failed to move file %s to %s, %s" %
                                        (fname, dirname, str(why)))
                    if os.path.isdir(os.path.join(processpath, fname)):
                        pp_path = os.path.join(processpath, fname)
                        logger.debug('Found folder (%s%%) %s for %s' %
                                     (match, pp_path, book['NZBtitle']))
                        matches.append([match, pp_path, book])
                else:
                    logger.debug('No match (%s%%) %s for %s' %
                                 (match, matchname, matchtitle))
            else:
                logger.debug('Skipping %s' % fname)

        if matches:
            highest = max(matches, key=lambda x: x[0])
            match = highest[0]
            pp_path = highest[1]
            book = highest[2]
            logger.info(u'Best match (%s%%): %s for %s' %
                        (match, pp_path, book['NZBtitle']))

            data = myDB.select('SELECT * from books WHERE BookID="%s"' %
                               book['BookID'])
            if data:
                authorname = data[0]['AuthorName']
                bookname = data[0]['BookName']
                if 'windows' in platform.system().lower(
                ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                    logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                    lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                        '/', '\\')

                # Default destination path, should be allowed change per config file.
                dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = lazylibrarian.EBOOK_DEST_FILE.replace(
                    '$Author', authorname).replace('$Title', bookname)
                global_name = unaccented(global_name)
                # dest_path = authorname+'/'+bookname
                # global_name = bookname + ' - ' + authorname
                # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                dic = {
                    '<': '',
                    '>': '',
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': '',
                    ':': '',
                    ';': '',
                    '\'': ''
                }
                dest_path = unaccented_str(replace_all(dest_path, dic))
                dest_path = os.path.join(lazylibrarian.DESTINATION_DIR,
                                         dest_path).encode(
                                             lazylibrarian.SYS_ENCODING)
            else:
                data = myDB.select('SELECT * from magazines WHERE Title="%s"' %
                                   book['BookID'])
                if data:
                    # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                    # files are downloading, there will be an error in post-processing, trying to go to the
                    # same directory.
                    mostrecentissue = data[0][
                        'IssueDate']  # keep for processing issues arriving out of order
                    # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {
                        '<': '',
                        '>': '',
                        '...': '',
                        ' & ': ' ',
                        ' = ': ' ',
                        '?': '',
                        '$': 's',
                        ' + ': ' ',
                        '"': '',
                        ',': '',
                        '*': '',
                        ':': '',
                        ';': '',
                        '\'': ''
                    }
                    mag_name = unaccented_str(replace_all(book['BookID'], dic))
                    # book auxinfo is a cleaned date, eg 2015-01-01
                    dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    # dest_path = '_Magazines/'+title+'/'+book['AuxInfo']
                    if lazylibrarian.MAG_RELATIVE:
                        if dest_path[0] not in '._':
                            dest_path = '_' + dest_path
                        dest_path = os.path.join(
                            lazylibrarian.DESTINATION_DIR,
                            dest_path).encode(lazylibrarian.SYS_ENCODING)
                    else:
                        dest_path = dest_path.encode(
                            lazylibrarian.SYS_ENCODING)
                    authorname = None
                    bookname = None
                    global_name = lazylibrarian.MAG_DEST_FILE.replace(
                        '$IssueDate',
                        book['AuxInfo']).replace('$Title', mag_name)
                    global_name = unaccented(global_name)
                    # global_name = book['AuxInfo']+' - '+title
                else:
                    logger.debug(
                        "Snatched magazine %s is not in download directory" %
                        (book['BookID']))
                    continue
        else:
            logger.debug("Snatched %s %s is not in download directory" %
                         (book['NZBmode'], book['NZBtitle']))
            continue

        processBook = processDestination(pp_path, dest_path, authorname,
                                         bookname, global_name)

        if processBook:
            logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
            # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {
                "Status": "Processed",
                "NZBDate": now()
            }  # say when we processed it
            myDB.upsert("wanted", newValueDict, controlValueDict)

            if bookname is not None:  # it's a book, if None it's a magazine
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug(
                        'Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            else:
                # update mags
                controlValueDict = {"Title": book['BookID']}
                if mostrecentissue:
                    if mostrecentissue.isdigit() and str(
                            book['AuxInfo']).isdigit():
                        older = int(mostrecentissue) > int(
                            book['AuxInfo'])  # issuenumber
                    else:
                        older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                else:
                    older = False
                if older:  # check this in case processing issues arriving out of order
                    newValueDict = {
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                else:
                    newValueDict = {
                        "IssueDate": book['AuxInfo'],
                        "LastAcquired": today(),
                        "IssueStatus": "Open"
                    }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                # dest_path is where we put the magazine after processing, but we don't have the full filename
                # so look for any "book" in that directory
                dest_file = book_file(dest_path, booktype='mag')
                controlValueDict = {
                    "Title": book['BookID'],
                    "IssueDate": book['AuxInfo']
                }
                newValueDict = {
                    "IssueAcquired":
                    today(),
                    "IssueFile":
                    dest_file,
                    "IssueID":
                    create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                }
                myDB.upsert("issues", newValueDict, controlValueDict)

                # create a thumbnail cover for the new issue
                create_cover(dest_file)

            logger.info('Successfully processed: %s' % global_name)
            ppcount = ppcount + 1
            notify_download("%s at %s" % (global_name, now()))
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' %
                         pp_path)
            controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
            newValueDict = {"Status": "Failed", "NZBDate": now()}
            myDB.upsert("wanted", newValueDict, controlValueDict)
            # if it's a book, reset status so we try for a different version
            # if it's a magazine, user can select a different one from pastissues table
            if bookname is not None:
                myDB.action(
                    'UPDATE books SET status = "Wanted" WHERE BookID="%s"' %
                    book['BookID'])

            # at this point, as it failed we should move it or it will get postprocessed
            # again (and fail again)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)

    downloads = os.listdir(
        processpath)  # check in case we processed/deleted some above
    for directory in downloads:
        if "LL.(" in directory and not directory.endswith('.fail'):
            bookID = str(directory).split("LL.(")[1].split(")")[0]
            logger.debug("Book with id: " + str(bookID) + " is in downloads")
            pp_path = os.path.join(processpath, directory)

            if os.path.isfile(pp_path):
                pp_path = os.path.join(processpath)

            if (os.path.isdir(pp_path)):
                logger.debug('Found LL folder %s.' % pp_path)
            if import_book(pp_path, bookID):
                ppcount = ppcount + 1

    if ppcount == 0:
        logger.info('No snatched books/mags have been found')
    else:
        logger.info('%s book%s/mag%s processed.' %
                    (ppcount, plural(ppcount), plural(ppcount)))

    if reset:
        scheduleJob(action='Restart', target='processDir')
Ejemplo n.º 15
0
def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url = tor_url.split('&file=')[0]
            value = tor_url.split('&file=')[1]
            if isinstance(value, str):
                value = value.decode('utf-8')  # make unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        request = urllib2.Request(ur'%s' % tor_url)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
        request.add_header('Accept-encoding', 'gzip')
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = urllib2.urlopen(request, timeout=90)
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
                torrent = f.read()
            else:
                torrent = response.read()

        except (socket.timeout) as e:
            logger.warn('Timeout fetching torrent from url: %s' % tor_url)
            return False
        except (urllib2.URLError) as e:
            logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason))
            return False

    if lazylibrarian.TOR_DOWNLOADER_BLACKHOLE:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        tor_title = None
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.TOR_CONVERT_MAGNET:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name = tor_name + '.magnet'
                tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
                with open(tor_path, 'wb') as torrent_file:
                    torrent_file.write(torrent)
                logger.debug('Magnet file saved: %s' % tor_path)
                setperm(tor_path)
                downloadID = Source
        else:
            tor_name = tor_name + '.torrent'
            tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
            with open(tor_path, 'wb') as torrent_file:
                torrent_file.write(torrent)
            setperm(tor_path)
            logger.debug('Torrent file saved: %s' % tor_name)
            downloadID = Source

    if (lazylibrarian.TOR_DOWNLOADER_UTORRENT and lazylibrarian.UTORRENT_HOST):
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_RTORRENT and lazylibrarian.RTORRENT_HOST):
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_QBITTORRENT and lazylibrarian.QBITTORRENT_HOST):
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url)  # returns hash or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)
        else:
            logger.debug("qbittorrent returned: %s" % str(response))

    if (lazylibrarian.TOR_DOWNLOADER_TRANSMISSION and lazylibrarian.TRANSMISSION_HOST):
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_SYNOLOGY and lazylibrarian.USE_SYNOLOGY and lazylibrarian.SYNOLOGY_HOST):
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_DELUGE and lazylibrarian.DELUGE_HOST):
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.DELUGE_USER:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                     int(lazylibrarian.DELUGE_PORT),
                                     lazylibrarian.DELUGE_USER,
                                     lazylibrarian.DELUGE_PASS)
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.DELUGE_LABEL:
                        labelled = client.call('label.set_torrent', downloadID, lazylibrarian.DELUGE_LABEL)
                    result = client.call('core.get_torrent_status', downloadID, {})
                    #for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s' % str(e))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid)
        myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' %
                    (Source, downloadID, full_url))
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url))
        return True
    else:
        logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url)
        return False
Ejemplo n.º 16
0
def processDir(reset=False):
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            threading.currentThread().name = "POSTPROCESS"
        processpath = lazylibrarian.DIRECTORY('Download')

        logger.debug('Checking [%s] for files to post process' % processpath)

        try:
            downloads = os.listdir(processpath)
        except OSError as why:
            logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror))
            return

        myDB = database.DBConnection()
        snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')

        if len(snatched) == 0:
            logger.info('Nothing marked as snatched.')
            scheduleJob(action='Stop', target='processDir')
            return

        if len(downloads) == 0:
            logger.info('No downloads are found. Nothing to process yet.')
            return

        logger.info("Checking %s download%s for %s snatched file%s" %
                    (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched))))
        ppcount = 0
        for book in snatched:
            # if torrent, see if we can get current status from the downloader as the name
            # may have been changed once magnet resolved, or download started or completed
            # depending on torrent downloader. Usenet doesn't change the name. We like usenet.
            torrentname = ''
            try:
                logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source']))
                if book['Source'] == 'TRANSMISSION':
                    torrentname = transmission.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'UTORRENT':
                    torrentname = utorrent.nameTorrent(book['DownloadID'])
                elif book['Source'] == 'RTORRENT':
                    torrentname = rtorrent.getName(book['DownloadID'])
                elif book['Source'] == 'QBITTORRENT':
                    torrentname = qbittorrent.getName(book['DownloadID'])
                elif book['Source'] == 'SYNOLOGY_TOR':
                    torrentname = synology.getName(book['DownloadID'])
                elif book['Source'] == 'DELUGEWEBUI':
                    torrentname = deluge.getTorrentFolder(book['DownloadID'])
                elif book['Source'] == 'DELUGERPC':
                    client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                             int(lazylibrarian.DELUGE_PORT),
                                             lazylibrarian.DELUGE_USER,
                                             lazylibrarian.DELUGE_PASS)
                    try:
                        client.connect()
                        result = client.call('core.get_torrent_status', book['DownloadID'], {})
                        #    for item in result:
                        #        logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                        if 'name' in result:
                            torrentname = unaccented_str(result['name'])
                    except Exception as e:
                        logger.debug('DelugeRPC failed %s' % str(e))
            except Exception as e:
                logger.debug("Failed to get updated torrent name from %s for %s: %s" %
                            (book['Source'], book['DownloadID'], str(e)))

            matchtitle = unaccented_str(book['NZBtitle'])
            if torrentname and torrentname != matchtitle:
                logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl']))
                matchtitle = torrentname

            # here we could also check percentage downloaded or eta or status?
            # If downloader says it hasn't completed, no need to look for it.

            matches = []
            logger.info('Looking for %s in %s' % (matchtitle, processpath))
            for fname in downloads:
                # skip if failed before or incomplete torrents, or incomplete btsync
                extn = os.path.splitext(fname)[1]
                if extn not in ['.fail', '.part', '.bts', '.!ut']:
                    # This is to get round differences in torrent filenames.
                    # Usenet is ok, but Torrents aren't always returned with the name we searched for
                    # We ask the torrent downloader for the torrent name, but don't always get an answer
                    # so we try to do a "best match" on the name, there might be a better way...
                    if isinstance(fname, str):
                        matchname = fname.decode(lazylibrarian.SYS_ENCODING)
                    else:
                        matchname = fname
                    if ' LL.(' in matchname:
                        matchname = matchname.split(' LL.(')[0]

                    match = 0
                    if matchtitle:
                        if ' LL.(' in matchtitle:
                            matchtitle = matchtitle.split(' LL.(')[0]
                        match = fuzz.token_set_ratio(matchtitle, matchname)
                    if match and match >= lazylibrarian.DLOAD_RATIO:
                        fname = matchname
                        if os.path.isfile(os.path.join(processpath, fname)):
                            # handle single file downloads here. Book/mag file in download root.
                            # move the file into it's own subdirectory so we don't move/delete things that aren't ours
                            logger.debug('filename [%s] is a file' % os.path.join(processpath, fname))
                            if is_valid_booktype(fname, booktype="book") \
                                    or is_valid_booktype(fname, booktype="mag"):
                                logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname))
                                if bts_file(processpath):
                                    logger.debug("Skipping %s, found a .bts file" % processpath)
                                else:
                                    fname = os.path.splitext(fname)[0]
                                    dirname = os.path.join(processpath, fname)
                                    if not os.path.exists(dirname):
                                        try:
                                            os.makedirs(dirname)
                                            setperm(dirname)
                                        except OSError as why:
                                            logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror))
                                    if os.path.exists(dirname):
                                        # move the book and any related files too
                                        # ie other book formats, or opf, jpg with same title
                                        # can't move metadata.opf or cover.jpg or similar
                                        # as can't be sure they are ours
                                        # not sure if we need a new listdir here, or whether we can use the old one
                                        list_dir = os.listdir(processpath)
                                        for ourfile in list_dir:
                                            if ourfile.startswith(fname):
                                                if is_valid_booktype(ourfile, booktype="book") \
                                                    or is_valid_booktype(ourfile, booktype="mag") \
                                                        or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']:
                                                    try:
                                                        if lazylibrarian.DESTINATION_COPY:
                                                            shutil.copyfile(os.path.join(processpath, ourfile),
                                                                            os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                        else:
                                                            shutil.move(os.path.join(processpath, ourfile),
                                                                        os.path.join(dirname, ourfile))
                                                            setperm(os.path.join(dirname, ourfile))
                                                    except Exception as why:
                                                        logger.debug("Failed to copy/move file %s to %s, %s" %
                                                                    (ourfile, dirname, str(why)))

                        pp_path = os.path.join(processpath, fname)
                        if os.path.isdir(pp_path):
                            logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle))
                            if not os.listdir(pp_path):
                                logger.debug("Skipping %s, folder is empty" % pp_path)
                            elif bts_file(pp_path):
                                logger.debug("Skipping %s, found a .bts file" % pp_path)
                            else:
                                matches.append([match, pp_path, book])
                    else:
                        pp_path = os.path.join(processpath, fname)
                        matches.append([match, pp_path, book])  # so we can report closest match
                else:
                    logger.debug('Skipping %s' % fname)

            match = 0
            if matches:
                highest = max(matches, key=lambda x: x[0])
                match = highest[0]
                pp_path = highest[1]
                book = highest[2]
            if match and match >= lazylibrarian.DLOAD_RATIO:
                logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle']))
                data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID'])
                if data:  # it's a book
                    logger.debug(u'Processing book %s' % book['BookID'])
                    authorname = data['AuthorName']
                    bookname = data['BookName']
                    if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                        logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                        lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')
                    # Default destination path, should be allowed change per config file.
                    dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace(
                        '$Title', bookname)
                    global_name = unaccented(global_name)
                    # dest_path = authorname+'/'+bookname
                    # global_name = bookname + ' - ' + authorname
                    # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
                    # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                    dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                           ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                    dest_path = unaccented_str(replace_all(dest_path, dic))
                    dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)
                else:
                    data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID'])
                    if data:  # it's a magazine
                        logger.debug(u'Processing magazine %s' % book['BookID'])
                        # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple
                        # files are downloading, there will be an error in post-processing, trying to go to the
                        # same directory.
                        mostrecentissue = data['IssueDate']  # keep for processing issues arriving out of order
                        # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR
                        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
                        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
                        mag_name = unaccented_str(replace_all(book['BookID'], dic))
                        # book auxinfo is a cleaned date, eg 2015-01-01
                        dest_path = lazylibrarian.MAG_DEST_FOLDER.replace(
                            '$IssueDate', book['AuxInfo']).replace('$Title', mag_name)

                        if lazylibrarian.MAG_RELATIVE:
                            if dest_path[0] not in '._':
                                dest_path = '_' + dest_path
                            dest_path = os.path.join(processpath, dest_path).encode(
                                lazylibrarian.SYS_ENCODING)
                        else:
                            dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING)
                        authorname = None
                        bookname = None
                        global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace(
                            '$Title', mag_name)
                        global_name = unaccented(global_name)
                    else:  # not recognised
                        logger.debug('Nothing in database matching "%s"' % book['BookID'])
                        continue
            else:
                logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle']))
                if match:
                    logger.debug(u'Closest match (%s%%): %s' % (match, pp_path))
                    #for match in matches:
                    #    logger.info('Match: %s%%  %s' % (match[0], match[1]))
                continue

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                logger.debug("Processing %s, %s" % (global_name, book['NZBurl']))
                # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue
                controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)

                if bookname:
                    # it's a book, if None it's a magazine
                    if len(lazylibrarian.IMP_CALIBREDB):
                        logger.debug('Calibre should have created the extras for us')
                    else:
                        processExtras(myDB, dest_path, global_name, data)
                else:
                    # update mags
                    controlValueDict = {"Title": book['BookID']}
                    if mostrecentissue:
                        if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit():
                            older = int(mostrecentissue) > int(book['AuxInfo'])  # issuenumber
                        else:
                            older = mostrecentissue > book['AuxInfo']  # YYYY-MM-DD
                    else:
                        older = False
                    if older:  # check this in case processing issues arriving out of order
                        newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"}
                    else:
                        newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(),
                                        "IssueStatus": "Open"}
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    # dest_path is where we put the magazine after processing, but we don't have the full filename
                    # so look for any "book" in that directory
                    dest_file = book_file(dest_path, booktype='mag')
                    controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']}
                    newValueDict = {"IssueAcquired": today(),
                                    "IssueFile": dest_file,
                                    "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo']))
                                    }
                    myDB.upsert("issues", newValueDict, controlValueDict)

                    # create a thumbnail cover for the new issue
                    create_cover(dest_file)

                # calibre or ll copied/moved the files we want, now delete source files

                to_delete = True
                if book['NZBmode'] in ['torrent', 'magnet']:
                    # Only delete torrents if we don't want to keep seeding
                    if lazylibrarian.KEEP_SEEDING:
                        logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle']))
                        to_delete = False
                    else:
                        # ask downloader to delete the torrent, but not the files
                        # we may delete them later, depending on other settings
                        if book['DownloadID'] != "unknown":
                            logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower()))
                            delete_task(book['Source'], book['DownloadID'], False)
                        else:
                            logger.warn("Unable to remove %s from %s, no DownloadID" %
                                (book['NZBtitle'], book['Source'].lower()))

                if to_delete:
                    # only delete the files if not in download root dir and if DESTINATION_COPY not set
                    if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath):
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                ppcount = ppcount + 1
                notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now()))
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
                # if it's a book, reset status so we try for a different version
                # if it's a magazine, user can select a different one from pastissues table
                if bookname:
                    myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID'])

                # at this point, as it failed we should move it or it will get postprocessed
                # again (and fail again)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))

        downloads = os.listdir(processpath)  # check in case we processed/deleted some above
        for directory in downloads:
            dname, extn = os.path.splitext(directory)
            if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']:
                bookID = str(directory).split("LL.(")[1].split(")")[0]
                logger.debug("Book with id: " + str(bookID) + " found in download directory")
                pp_path = os.path.join(processpath, directory)

                if os.path.isfile(pp_path):
                    pp_path = os.path.join(processpath)

                if (os.path.isdir(pp_path)):
                    if import_book(pp_path, bookID):
                        ppcount = ppcount + 1

        if ppcount == 0:
            logger.info('No snatched books/mags have been found')
        else:
            logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount)))

        # Now check for any that are still marked snatched...
        if lazylibrarian.TASK_AGE:
            snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"')
            if len(snatched) > 0:
                for snatch in snatched:
                    # FUTURE: we could check percentage downloaded or eta?
                    # if percentage is increasing, it's just slow
                    try:
                        when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S')
                        when_snatched = time.mktime(when_snatched)
                        diff = time.time() - when_snatched  # time difference in seconds
                    except:
                        diff = 0
                    hours = int(diff / 3600)
                    if hours >= lazylibrarian.TASK_AGE:
                        logger.warn('%s was sent to %s %s hours ago, deleting failed task' %
                                    (snatch['NZBtitle'], snatch['Source'].lower(), hours))
                        # change status to "Failed", and ask downloader to delete task and files
                        if snatch['BookID'] != 'unknown':
                            myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID'])
                            myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID'])
                            delete_task(snatch['Source'], snatch['DownloadID'], True)
        if reset:
            scheduleJob(action='Restart', target='processDir')

    except Exception as e:
        logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
Ejemplo n.º 17
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
  try:
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Regex']

        if not searchterm:
            searchterm = searchmag['Title']
            dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
            searchterm = unaccented_str(replace_all(searchterm, dic))
            searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING)

        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn('No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                            item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_name = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match('SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                    bad_name = bad_name + 1
                else:
                    rejected = False
                    maxsize = check_int(lazylibrarian.REJECT_MAGSIZE, 0)
                    if maxsize and nzbsize > maxsize:
                        logger.debug("Rejecting %s, too large" % nzbtitle)
                        rejected = True

                    if not rejected:
                        control_date = results['IssueDate']
                        reject_list = getList(results['Reject'])

                        dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': ''}
                        nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                        # Need to make sure that substrings of magazine titles don't get found
                        # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                        # remove extra spaces if they're in a row
                        nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                        nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                        if ' ' in bookid:
                            bookid_exploded = bookid.split(' ')
                        else:
                            bookid_exploded = [bookid]

                        # check nzb starts with magazine title followed by a date
                        # eg The MagPI Issue 22 - July 2015

                        if len(nzbtitle_exploded) > len(bookid_exploded):
                            # needs to be longer as it has to include a date
                            # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                            mag_title_match = fuzz.token_set_ratio(
                                unaccented(bookid),
                                unaccented(nzbtitle_formatted))

                            if mag_title_match < lazylibrarian.MATCH_RATIO:
                                logger.debug(
                                    u"Magazine token set Match failed: " + str(
                                        mag_title_match) + "% for " + nzbtitle_formatted)
                                rejected = True
                            else:
                                logger.debug(
                                    u"Magazine matched: " + str(
                                        mag_title_match) + "% " + bookid + " for " + nzbtitle_formatted)
                        else:
                            rejected = True

                    if not rejected:
                        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                                    nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted, already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                break

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # or other words we don't want. Should make the word list configurable.
                        # so strip all the trailing junk...
                        strip_list = ['pdf', 'true', 'truepdf', 'german', 'ebooks']
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in strip_list:
                                nzbtitle_exploded.pop()  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months, or month <1 or >12
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                    regexD_issue = regexD_issue.strip(',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexD_year.isdigit():
                                                        if int(regexD_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            # regexE nn YYYY issue number without "Nr" before it
                                            # nn is assumed not to be a month as they are normally names not digits
                                            try:
                                                regexE_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                regexE_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                print "[%s][%s]" % (regexE_year, regexE_issue)
                                                if regexE_issue.isdigit():
                                                    newdatish = int(regexE_issue)
                                                    if int(regexE_year) < int(datetime.date.today().year):
                                                        newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                            except Exception:
                                                # regexF issue and year as a single 6 digit string eg 222015
                                                try:
                                                    regexF = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexF.isdigit() and len(regexF) == 6:
                                                        regexF_issue = regexF[:2]
                                                        regexF_year = regexF[2:]
                                                        newdatish = str(int(regexF_issue))  # 4 == 04 == 004
                                                        if int(regexF_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                    else:
                                                        raise ValueError

                                                except Exception:
                                                    logger.debug('Magazine %s not in a recognised date format.' %
                                                                 nzbtitle_formatted)
                                                    bad_date = bad_date + 1
                                                    # allow issues with good name but bad date to be included
                                                    # so user can manually select them, incl those with issue numbers
                                                    newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                                    # continue
                        else:
                            logger.debug('Magazine [%s] does not match the search term [%s].' % (
                                nzbtitle_formatted, bookid))
                            bad_name = bad_name + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= int(lazylibrarian.MAG_AGE) * 24 * 60 * 60  # number of seconds in days
                                if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                    start_time = 0
                                control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                logger.debug('Magazine date comparing to %s' % control_date)
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug('Magazine %s incorrect date or issue format.' % nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select('SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % (
                                                insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict, controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_name = bad_name + 1

            logger.info('Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (
                        total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_name, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" %
                                  (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")

  except Exception as e:
    logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
Ejemplo n.º 18
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }
    # ' the ': ' ', ' a ': ' ', ' and ': ' ',
    # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': '',
        '*': '',
        ':': '',
        ';': '',
        '\'': ''
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = unaccented_str(replace_all(nzb['nzbtitle'],
                                               dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" %
                     (nzbAuthor_match, nzbBook_match, nzb_Title))
        nzburl = nzb['nzburl']

        rejected = False

        already_failed = myDB.action(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            nzburl).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (nzb_Title, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in nzb_Title.lower(
                ) and not word in author.lower() and not word in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (nzb_Title, word))
                    break

        nzbsize_temp = nzb[
            'nzbsize']  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and nzbsize > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % nzb_Title)

        if not rejected:
            if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio:
                bookid = book['bookid']
                nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] +
                            ')').strip()
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                controlValueDict = {"NZBurl": nzburl}
                newValueDict = {
                    "NZBprov": nzbprov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": nzbsize,
                    "NZBtitle": nzbTitle,
                    "NZBmode": nzbmode,
                    "Status": "Skipped"
                }

                score = (nzbBook_match +
                         nzbAuthor_match) / 2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(nzb_Title))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append(
                    [score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match NZB (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"]).fetchone()
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it
        else:
            logger.debug('%s adding to wanted' % nzb_Title)
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' +
                                        now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No nzb's found for [%s] using searchtype %s" %
                     (book["searchterm"], searchtype))
    return False
Ejemplo n.º 19
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         audiostatus="Skipped",
                         entrystatus='Active',
                         refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            locked_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(
                            URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn=?',
                                        (isbnhead, ))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    book['isbn']
                                            ) == 13 and book[
                                                    'isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action(
                                                    'insert into languages values (?, ?)',
                                                    (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action(
                                                'insert into languages values (?, ?)',
                                                (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (book['name'], googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' %
                                             (book['name'], booklang))
                                ignored += 1
                                continue

                        ignorable = ['future', 'date', 'isbn']
                        if lazylibrarian.CONFIG['NO_LANG']:
                            ignorable.append('lang')
                        rejected = None
                        check_status = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            rejected = 'name', 'No bookname'
                        else:
                            bookname = replace_all(unaccented(bookname), {
                                ':': '.',
                                '"': '',
                                '\'': ''
                            }).strip()
                            if re.match(
                                    '[^\w-]', bookname
                            ):  # remove books with bad characters in title
                                logger.debug(
                                    "[%s] removed book for bad characters" %
                                    bookname)
                                rejected = 'chars', 'Bad characters in bookname'

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, book['date']))
                                rejected = 'future', 'Future publication date [%s]' % book[
                                    'date']

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug(
                                    'Rejecting %s, no publication date' %
                                    bookname)
                                rejected = 'date', 'No publication date'

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' %
                                             bookname)
                                rejected = 'isbn', 'No ISBN'

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace(
                                '"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = 'bookid', 'Got under different bookid %s' % bookid
                                    duplicates += 1

                        cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors'
                        cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                        match = myDB.match(cmd, (bookid, ))
                        if match:  # we have a book with this bookid already
                            if bookname != match[
                                    'BookName'] or authorname != match[
                                        'AuthorName']:
                                logger.debug(
                                    'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                    % (bookid, authorname, bookname,
                                       match['AuthorName'], match['BookName']))
                            else:
                                logger.debug(
                                    'Rejecting bookid %s for [%s][%s] already got this book in database'
                                    % (bookid, authorname, bookname))
                                check_status = True
                            duplicates += 1
                            rejected = 'got', 'Already got this book in database'

                            # Make sure we don't reject books we have got
                            if match['Status'] in [
                                    'Open', 'Have'
                            ] or match['AudioStatus'] in ['Open', 'Have']:
                                rejected = None

                        if rejected and rejected[0] not in ignorable:
                            removedResults += 1
                        if check_status or rejected is None or (
                                lazylibrarian.CONFIG['IMP_IGNORE']
                                and rejected[0] in ignorable):  # dates, isbn

                            cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName '
                            cmd += 'FROM books WHERE BookID=?'
                            existing = myDB.match(cmd, (bookid, ))
                            if existing:
                                book_status = existing['Status']
                                audio_status = existing['AudioStatus']
                                if lazylibrarian.CONFIG[
                                        'FOUND_STATUS'] == 'Open':
                                    if book_status == 'Have' and existing[
                                            'BookFile']:
                                        book_status = 'Open'
                                    if audio_status == 'Have' and existing[
                                            'AudioFile']:
                                        audio_status = 'Open'
                                locked = existing['Manual']
                                added = existing['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                audio_status = audiostatus
                                added = today()
                                locked = False

                            if rejected:
                                reason = rejected[1]
                                if rejected[0] in ignorable:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                    book_ignore_count += 1
                            else:
                                reason = ''

                            if locked:
                                locked_count += 1
                            else:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added,
                                    "WorkID": '',
                                    "ScanResult": reason
                                }

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug("Book found: " + bookname + " " +
                                             book['date'])
                                if 'nocover' in book[
                                        'img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            'Updated cover for %s using %s' %
                                            (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)

                                elif book['img'] and book['img'].startswith(
                                        'http'):
                                    link, success, _ = cache_img(
                                        "book",
                                        bookid,
                                        book['img'],
                                        refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [
                                        ('', book['seriesNum'],
                                         cleanName(unaccented(book['series']),
                                                   '&/'))
                                    ]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug(
                                            'Updated series: %s [%s]' %
                                            (bookid, serieslist))
                                    setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                else:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid, ))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookid = lastbook['BookID']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookid = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookID": lastbookid,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)
            resultcount = added_count + updated_count
            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Found %s locked book%s" %
                         (locked_count, plural(locked_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s incorrect/incomplete result%s" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" %
                         (duplicates, plural(duplicates)))
            logger.debug("Ignored %s book%s" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                 lt_lang_hits, gb_lang_change, cache_hits, ignored,
                 removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Ejemplo n.º 20
0
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            if isinstance(value, str):
                value = value.decode('utf-8')  # make unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        request = urllib2.Request(ur'%s' % tor_url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], lazylibrarian.CONFIG['PROXY_TYPE'])
        request.add_header('Accept-encoding', 'gzip')
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = urllib2.urlopen(request, timeout=90)
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
                torrent = f.read()
            else:
                torrent = response.read()

        except socket.timeout:
            logger.warn('Timeout fetching torrent from url: %s' % tor_url)
            return False
        except urllib2.URLError as e:
            logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason))
            return False
        except ValueError as e:
            logger.warn('Error, invalid url: [%s] %s' % (full_url, str(e)))
            return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                try:
                    with open(tor_path, 'wb') as torrent_file:
                        torrent_file.write(torrent)
                    logger.debug('Magnet file saved: %s' % tor_path)
                    setperm(tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.debug("Failed to write magnet to file %s, %s" % (tor_path, str(e)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
            try:
                with open(tor_path, 'wb') as torrent_file:
                    torrent_file.write(torrent)
                setperm(tor_path)
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.debug("Failed to write torrent to file %s, %s" % (tor_path, str(e)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url)  # returns hash or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'])
                    result = client.call('core.get_torrent_status', downloadID, {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s' % str(e))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid)
        myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' %
                    (Source, downloadID, full_url))
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url))
        return True
    else:
        logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url)
        return False
Ejemplo n.º 21
0
def SABnzbd(title=None, nzburl=None, remove_data=False):

    if nzburl in ['delete', 'delhistory'] and title == 'unknown':
        res = '%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl
        logger.debug(res)
        return False, res

    hostname = lazylibrarian.CONFIG['SAB_HOST']
    port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0)
    if not hostname or not port:
        res = 'Invalid sabnzbd host or port, check your config'
        logger.error(res)
        return False, res

    if hostname.endswith('/'):
        hostname = hostname[:-1]
    if not hostname.startswith("http://") and not hostname.startswith(
            "https://"):
        hostname = 'http://' + hostname

    HOST = "%s:%s" % (hostname, port)

    if lazylibrarian.CONFIG['SAB_SUBDIR']:
        HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR']

    params = {}
    if nzburl == 'auth' or nzburl == 'get_cats':
        # connection test, check auth mode or get_cats
        params['mode'] = nzburl
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(%s)' % nzburl
    elif nzburl == 'queue':
        params['mode'] = 'queue'
        params['limit'] = '100'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(Queue)'
    elif nzburl == 'history':
        params['mode'] = 'history'
        params['limit'] = '100'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(History)'
    elif nzburl == 'delete':
        # only deletes tasks if still in the queue, ie NOT completed tasks
        params['mode'] = 'queue'
        params['output'] = 'json'
        params['name'] = nzburl
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(Delete) ' + title
    elif nzburl == 'delhistory':
        params['mode'] = 'history'
        params['output'] = 'json'
        params['name'] = 'delete'
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(DelHistory) ' + title
    else:
        params['mode'] = 'addurl'
        params['output'] = 'json'
        if nzburl:
            params['name'] = nzburl
        if title:
            params['nzbname'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if lazylibrarian.CONFIG['SAB_CAT']:
            params['cat'] = lazylibrarian.CONFIG['SAB_CAT']
        if lazylibrarian.CONFIG['USENET_RETENTION']:
            params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION']

# FUTURE-CODE
#    if lazylibrarian.SAB_PRIO:
#        params["priority"] = lazylibrarian.SAB_PRIO
#    if lazylibrarian.SAB_PP:
#        params["script"] = lazylibrarian.SAB_SCRIPT

    URL = HOST + "/api?" + urlencode(params)

    # to debug because of api
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)
    proxies = proxyList()
    try:
        timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
        r = requests.get(URL, timeout=timeout, proxies=proxies)
        result = r.json()
    except requests.exceptions.Timeout:
        res = "Timeout connecting to SAB with URL: %s" % URL
        logger.error(res)
        return False, res
    except Exception as e:
        if hasattr(e, 'reason'):
            errmsg = e.reason
        elif hasattr(e, 'strerror'):
            errmsg = e.strerror
        else:
            errmsg = str(e)

        res = "Unable to connect to SAB with URL: %s, %s" % (URL, errmsg)
        logger.error(res)
        return False, res
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug("Result text from SAB: " + str(result))

    if title:
        title = unaccented_str(title)
        if title.startswith('LL.('):
            return result, ''
    if result['status'] is True:
        logger.info("%s sent to SAB successfully." % title)
        # sab versions earlier than 0.8.0 don't return nzo_ids
        if 'nzo_ids' in result:
            if result['nzo_ids']:  # check its not empty
                return result['nzo_ids'][0], ''
        return 'unknown'
    elif result['status'] is False:
        res = "SAB returned Error: %s" % result['error']
        logger.error(res)
        return False, res
    else:
        res = "Unknown error: %s" % str(result)
        logger.error(res)
        return False, res
Ejemplo n.º 22
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if not mags:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if not mags:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'direct'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders, dltypes = IterateOverRSSSites()
                if not nproviders or 'M' not in dltypes:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        if 'M' in item['types']:
                            resultlist.append({
                                'bookid': book['bookid'],
                                'nzbprov': item['tor_prov'],
                                'nzbtitle': item['tor_title'],
                                'nzburl': item['tor_url'],
                                'nzbdate': item['tor_date'],  # may be fake date as none returned from rss torrents
                                'nzbsize': item['tor_size'],
                                'nzbmode': item['tor_type']
                            })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic)
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split()

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            bookid_exploded = replace_all(bookid, dic).split()

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 23
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                                item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 24
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss

    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                 )
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select(
                'SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"'
                % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' %
                (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Title']
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {
            '...': '',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': ''
        }

        searchterm = unaccented_str(replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ',
                            searchterm).encode(lazylibrarian.SYS_ENCODING)
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn(
            'There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No nzb providers are set. Check config for NEWZNAB or TORZNAB providers'
                )

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No torrent providers are set. Check config for TORRENT providers'
                )

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                        'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn(
                    'No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': item[
                            'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace(
                    "'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match(
                    'SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug(
                        'Magazine [%s] does not match search term [%s].' %
                        (nzbtitle, bookid))
                    bad_regex = bad_regex + 1
                else:
                    control_date = results['IssueDate']
                    reject_list = getList(results['Regex'])

                    dic = {
                        '.': ' ',
                        '-': ' ',
                        '/': ' ',
                        '+': ' ',
                        '_': ' ',
                        '(': '',
                        ')': ''
                    }
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(
                        nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    rejected = False
                    if len(nzbtitle_exploded) > len(
                            bookid_exploded
                    ):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(
                            unaccented(bookid), unaccented(nzbtitle_formatted))

                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(u"Magazine token set Match failed: " +
                                         str(mag_title_match) + "% for " +
                                         nzbtitle_formatted)
                            rejected = True
                    else:
                        rejected = True

                    if not rejected:
                        already_failed = myDB.match(
                            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"'
                            % nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted,
                                          already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" %
                                             (nzbtitle_formatted, word))
                                break

                    # maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
                    # if maxsize and nzbsize > maxsize:
                    #    rejected = True
                    #    logger.debug("Rejecting %s, too large" % nzbtitle_formatted)

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in ['pdf', 'true', 'truepdf']:
                            nzbtitle_exploded.pop(
                            )  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[
                                len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(
                                unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(
                                    regexA_year) < 1900 or int(
                                        regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[
                                len(nzbtitle_exploded) -
                                3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day
                                       ) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year),
                                                      int(regexA_month),
                                                      int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[
                                    len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(
                                    unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[
                                    len(nzbtitle_exploded) -
                                    2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(
                                        regexB_year) < 1900 or int(
                                            regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(
                                        int(regexB_year), int(regexB_month),
                                        int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[
                                        len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit(
                                    ) and int(regexC_year) > 1900 and int(
                                            regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[
                                            len(nzbtitle_exploded) -
                                            1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[
                                            len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit(
                                        ) and int(regexC_year) > 1900 and int(
                                                regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                2].zfill(2)
                                            regexC_day = nzbtitle_exploded[
                                                len(nzbtitle_exploded) -
                                                1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(
                                            int(regexC_year),
                                            int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Vol nn, YYYY or Issue/No/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[
                                                len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in [
                                                    "issue", "no", "vol"
                                            ]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(
                                                        int(regexD_issue)
                                                    )  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[
                                                    len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in [
                                                        "issue", "no", "vol"
                                                ]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 2]
                                                    regexD_issue = regexD_issue.strip(
                                                        ',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(
                                                            int(regexD_issue)
                                                        )  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[
                                                        len(nzbtitle_exploded)
                                                        - 1]
                                                    if regexD_year.isdigit():
                                                        if int(
                                                                regexD_year
                                                        ) < int(datetime.date.
                                                                today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            logger.debug(
                                                'Magazine %s not in proper date format.'
                                                % nzbtitle_formatted)
                                            bad_date = bad_date + 1
                                            # allow issues with good name but bad date to be included
                                            # so user can manually select them, incl those with issue numbers
                                            newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                            # continue
                        else:
                            logger.debug(
                                'Magazine [%s] does not match the search term [%s].'
                                % (nzbtitle_formatted, bookid))
                            bad_regex = bad_regex + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                                control_date = time.strftime(
                                    "%Y-%m-%d", time.localtime(start_time))
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(
                                newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(
                                4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug(
                                'Magazine %s incorrect date or issue format.' %
                                nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug(
                                    'This issue of %s is new, downloading' %
                                    nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug(
                                    'This issue of %s is already flagged for download'
                                    % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug(
                                    'This issue of %s is old; skipping.' %
                                    nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select(
                            'SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"'
                            % (insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict,
                                        controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_regex = bad_regex + 1

            logger.info(
                'Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download'
                % (total_nzbs, plural(total_nzbs), bookid, new_date, old_date,
                   bad_date, bad_regex, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(magazine['bookid'],
                                               magazine['nzbprov'],
                                               magazine['nzbtitle'],
                                               magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' %
                                (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" % (unaccented(
                        magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")
Ejemplo n.º 25
0
def create_cover(issuefile=None, refresh=False):
    if lazylibrarian.CONFIG[
            'IMP_CONVERT'] == 'None':  # special flag to say "no covers required"
        return
    if issuefile is None or not os.path.isfile(issuefile):
        logger.debug('No issuefile %s' % issuefile)
        return

    base, extn = os.path.splitext(issuefile)
    if not extn:
        logger.debug('Unable to create cover for %s, no extension?' %
                     issuefile)
        return

    coverfile = base + '.jpg'

    if os.path.isfile(coverfile):
        if refresh:
            os.remove(coverfile)
        else:
            logger.debug('Cover for %s exists' % issuefile)
            return  # quit if cover already exists and we didn't want to refresh

    logger.debug('Creating cover for %s' % issuefile)
    data = ''  # result from unzip or unrar
    extn = extn.lower()
    if extn in ['.cbz', '.epub']:
        try:
            data = zipfile.ZipFile(issuefile)
        except Exception as why:
            logger.debug("Failed to read zip file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    elif extn in ['.cbr']:
        try:
            # unrar will complain if the library isn't installed, needs to be compiled separately
            # see https://pypi.python.org/pypi/unrar/ for instructions
            # Download source from http://www.rarlab.com/rar_add.htm
            # note we need LIBRARY SOURCE not a binary package
            # make lib; sudo make install-lib; sudo ldconfig
            # lib.unrar should then be able to find libunrar.so
            from lib.unrar import rarfile
            data = rarfile.RarFile(issuefile)
        except Exception as why:
            logger.debug("Failed to read rar file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    if data:
        img = ''
        try:
            for member in data.namelist():
                memlow = member.lower()
                if '-00.' in memlow or '000.' in memlow or 'cover.' in memlow:
                    if memlow.endswith('.jpg') or memlow.endswith('.jpeg'):
                        img = data.read(member)
                        break
            if img:
                with open(coverfile, 'wb') as f:
                    f.write(img)
                return
            else:
                logger.debug("Failed to find image in %s" % issuefile)
        except Exception as why:
            logger.debug("Failed to extract image from %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))

    elif extn == '.pdf':
        generator = ""
        if len(lazylibrarian.CONFIG['IMP_CONVERT']
               ):  # allow external convert to override libraries
            generator = "external program: %s" % lazylibrarian.CONFIG[
                'IMP_CONVERT']
            if "gsconvert.py" in lazylibrarian.CONFIG['IMP_CONVERT']:
                msg = "Use of gsconvert.py is deprecated, equivalent functionality is now built in. "
                msg += "Support for gsconvert.py may be removed in a future release. See wiki for details."
                logger.warn(msg)
            converter = lazylibrarian.CONFIG['IMP_CONVERT']
            postfix = ''
            if not os.path.isfile(
                    converter):  # full path given, or just program_name?
                converter = os.path.join(os.getcwd(),
                                         lazylibrarian.CONFIG['IMP_CONVERT'])
                if 'convert' in converter and 'gs' not in converter:
                    # tell imagemagick to only convert first page
                    postfix = '[0]'
            try:
                params = [
                    converter,
                    '%s%s' % (issuefile, postfix),
                    '%s' % coverfile
                ]
                res = subprocess.check_output(params, stderr=subprocess.STDOUT)
                if res:
                    logger.debug('%s reports: %s' %
                                 (lazylibrarian.CONFIG['IMP_CONVERT'], res))
            except Exception as e:
                # logger.debug(params)
                logger.debug('External "convert" failed %s %s' %
                             (type(e).__name__, str(e)))

        elif platform.system() == "Windows":
            GS = os.path.join(os.getcwd(), "gswin64c.exe")
            generator = "local gswin64c"
            if not os.path.isfile(GS):
                GS = os.path.join(os.getcwd(), "gswin32c.exe")
                generator = "local gswin32c"
            if not os.path.isfile(GS):
                params = ["where", "gswin64c"]
                try:
                    GS = subprocess.check_output(
                        params, stderr=subprocess.STDOUT).strip()
                    generator = "gswin64c"
                except Exception as e:
                    logger.debug("where gswin64c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                params = ["where", "gswin32c"]
                try:
                    GS = subprocess.check_output(
                        params, stderr=subprocess.STDOUT).strip()
                    generator = "gswin32c"
                except Exception as e:
                    logger.debug("where gswin32c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                logger.debug("No gswin found")
                generator = "(no windows ghostscript found)"
            else:
                try:
                    params = [GS, "--version"]
                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    logger.debug("Found %s [%s] version %s" %
                                 (generator, GS, res))
                    generator = "%s version %s" % (generator, res)
                    issuefile = issuefile.split('[')[0]
                    params = [
                        GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER",
                        "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox",
                        "-sOutputFile=%s" % coverfile, issuefile
                    ]
                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    if not os.path.isfile(coverfile):
                        logger.debug("Failed to create jpg: %s" % res)
                except Exception:  # as why:
                    logger.debug("Failed to create jpg for %s" % issuefile)
                    logger.debug('Exception in gswin create_cover: %s' %
                                 traceback.format_exc())
        else:  # not windows
            try:
                # noinspection PyUnresolvedReferences,PyUnresolvedReferences,PyUnresolvedReferences
                from wand.image import Image
                interface = "wand"
            except ImportError:
                try:
                    # No PythonMagick in python3
                    # noinspection PyUnresolvedReferences
                    import PythonMagick
                    interface = "pythonmagick"
                except ImportError:
                    interface = ""
            try:
                if interface == 'wand':
                    generator = "wand interface"
                    # noinspection PyUnboundLocalVariable
                    with Image(filename=issuefile + '[0]') as img:
                        img.save(filename=coverfile)

                elif interface == 'pythonmagick':
                    generator = "pythonmagick interface"
                    # noinspection PyUnboundLocalVariable,PyArgumentList
                    img = PythonMagick.Image()
                    # PythonMagick requires filenames to be str, not unicode
                    if type(issuefile) is unicode:
                        issuefile = unaccented_str(issuefile)
                    if type(coverfile) is unicode:
                        coverfile = unaccented_str(coverfile)
                    img.read(issuefile + '[0]')
                    img.write(coverfile)

                else:
                    GS = os.path.join(os.getcwd(), "gs")
                    generator = "local gs"
                    if not os.path.isfile(GS):
                        GS = ""
                        params = ["which", "gs"]
                        try:
                            GS = subprocess.check_output(
                                params, stderr=subprocess.STDOUT).strip()
                            generator = GS
                        except Exception as e:
                            logger.debug("which gs failed: %s %s" %
                                         (type(e).__name__, str(e)))
                        if not os.path.isfile(GS):
                            logger.debug("Cannot find gs")
                            generator = "(no gs found)"
                        else:
                            params = [GS, "--version"]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            logger.debug("Found gs [%s] version %s" %
                                         (GS, res))
                            generator = "%s version %s" % (generator, res)
                            issuefile = issuefile.split('[')[0]
                            params = [
                                GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH",
                                "-dSAFER", "-dFirstPage=1", "-dLastPage=1",
                                "-dUseCropBox",
                                "-sOutputFile=%s" % coverfile, issuefile
                            ]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            if not os.path.isfile(coverfile):
                                logger.debug("Failed to create jpg: %s" % res)
            except Exception as e:
                logger.debug("Unable to create cover for %s using %s %s" %
                             (issuefile, type(e).__name__, generator))
                logger.debug('Exception in create_cover: %s' %
                             traceback.format_exc())

        if os.path.isfile(coverfile):
            setperm(coverfile)
            logger.debug("Created cover for %s using %s" %
                         (issuefile, generator))
            return

    # if not recognised extension or cover creation failed
    try:
        shutil.copyfile(
            os.path.join(lazylibrarian.PROG_DIR, 'data/images/nocover.jpg'),
            coverfile)
        setperm(coverfile)
    except Exception as why:
        logger.debug("Failed to copy nocover file, %s %s" %
                     (type(why).__name__, str(why)))
    return
Ejemplo n.º 26
0
def import_book(pp_path=None, bookID=None):

    # Separated this into a function so we can more easily import books from an alternate directory
    # and move them into LL folder structure given just the bookID, returns True or False
    # eg if import_book(source_directory, bookID):
    #         ppcount = ppcount + 1
    #
    myDB = database.DBConnection()
    data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID)
    if data:
        authorname = data[0]['AuthorName']
        bookname = data[0]['BookName']

        # try:
        #    auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING)
        #    os.chmod(auth_dir, 0777)
        # except Exception, e:
        #    logger.debug("Could not chmod author directory: " + str(auth_dir))

        if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
            logger.warn('Please check your EBOOK_DEST_FOLDER setting')
            lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

        dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
        global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
        global_name = unaccented(global_name)
        # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
        # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
        dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
               ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
        dest_path = unaccented_str(replace_all(dest_path, dic))
        dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING)

        processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

        if processBook:
            # update nzbs
            was_snatched = len(myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' % bookID))
            if was_snatched:
                controlValueDict = {"BookID": bookID}
                newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                myDB.upsert("wanted", newValueDict, controlValueDict)
            if bookname:
                if len(lazylibrarian.IMP_CALIBREDB):
                    logger.debug('Calibre should have created the extras for us')
                else:
                    processExtras(myDB, dest_path, global_name, data)
            logger.info('Successfully processed: %s' % global_name)
            notify_download("%s at %s" % (global_name, now()))
            return True
        else:
            logger.error('Postprocessing for %s has failed.' % global_name)
            logger.error('Warning - Residual files remain in %s.fail' % pp_path)
            was_snatched = len(myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' % bookID))
            if was_snatched:
                controlValueDict = {"BookID": bookID}
                newValueDict = {"Status": "Failed", "NZBDate": now()}
                myDB.upsert("wanted", newValueDict, controlValueDict)
            # reset status so we try for a different version
            myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % bookID)
            try:
                os.rename(pp_path, pp_path + '.fail')
            except:
                logger.debug("Unable to rename %s" % pp_path)
    return False
Ejemplo n.º 27
0
def search_rss_book(books=None, reset=False):
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHRSS"

    if not(lazylibrarian.USE_RSS()):
        logger.warn('RSS search is disabled')
        scheduleJob(action='Stop', target='search_rss_book')
        return

    myDB = database.DBConnection()
    searchlist = []

    if books is None:
        # We are performing a backlog search
        searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc')
    else:
        # The user has added a new book
        searchbooks = []
        for book in books:
            searchbook = myDB.select('SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \
                                     AND Status="Wanted"' % book['bookid'])
            for terms in searchbook:
                searchbooks.append(terms)

    if len(searchbooks) == 0:
        logger.debug("RSS search requested for no books or invalid BookID")
        return
    else:
        logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks))))

    resultlist, nproviders = IterateOverRSSSites()
    if not nproviders:
        logger.warn('No rss providers are set, check config')
        return  # No point in continuing

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    rss_count = 0
    for book in searchbooks:
        author = book['AuthorName']
        title = book['BookName']

        author = unaccented_str(replace_all(author, dic))
        title = unaccented_str(replace_all(title, dic))

        found = processResultList(resultlist, author, title, book)

        # if you can't find the book, try author without initials,
        # and title without any "(extended details, series etc)"
        if not found:
            if author[1] in '. ' or '(' in title:  # anything to shorten?
                while author[1] in '. ':  # strip any initials
                    author = author[2:].strip()  # and leading whitespace
                if '(' in title:
                    title = title.split('(')[0]
                found = processResultList(resultlist, author, title, book)

        if not found:
            logger.debug("Searches returned no results. Adding book %s - %s to queue." % (author, title))
        if found > True:
            rss_count = rss_count + 1

    logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count)))

    if reset:
        scheduleJob(action='Restart', target='search_rss_book')
Ejemplo n.º 28
0
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet:?'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    elif 'magnet:?' in tor_url:
        # discard any other parameters and just use the magnet link
        torrent = 'magnet:?' + tor_url.split('magnet:?')[1]
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = makeUnicode(value)  # ensure unicode
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add extra parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies)
            torrent = r.content
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            # some jackett providers redirect internally using http 301 to a magnet link
            # which requests can't handle, so throws an exception
            if "magnet:?" in str(e):
                torrent = 'magnet:?' + str(e).split('magnet:?')[1]. strip("'")
            else:
                if hasattr(e, 'reason'):
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason))
                else:
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm '
                    setperm(tor_path)
                    msg += 'Saved '
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.warn("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved '
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.warn("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    hashid = CalcTorrentHash(torrent)
    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        else:
            status = qbittorrent.addTorrent(tor_url, hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug("TORRENT %s [%s] [%s]" % (len(torrent), torrent[:20], torrent[-20:]))
        Source = "TRANSMISSION"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            downloadID = transmission.addTorrent(torrent)  # returns id or False
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            downloadID = transmission.addTorrent(torrent)
        elif torrent:
            downloadID = transmission.addTorrent(None, metainfo=b64encode(torrent))
        else:
            downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = hashid
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif torrent:
                downloadID = deluge.addTorrent(tor_title, data=b64encode(torrent))
            else:
                downloadID = deluge.addTorrent(tor_url)  # can be link or magnet, returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                elif isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif torrent:
                    downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower())
                    result = client.call('core.get_torrent_status', downloadID, {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.error('DelugeRPC failed %s %s' % (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" % library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word))
                        break
                if rejected:
                    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,))
        elif library == 'AudioBook':
            myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,))
        myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
                    (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
    return False
Ejemplo n.º 29
0
def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)

    matches = []

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = unaccented_str(replace_all(tor['tor_title'], dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, torTitle)
        tor_Title_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.action('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                    tor_url).fetchone()
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and not word in author.lower() and not word in book.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        if tor_size_temp is None:
            tor_size_temp = 1000
        tor_size = round(float(tor_size_temp) / 1048576, 2)
        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)

        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            if tor_Title_match >= match_ratio and tor_Author_match >= match_ratio:
                bookid = book['bookid']
                tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                             ' LL.(' + book['bookid'] + ')').strip()
                tor_prov = tor['tor_prov']
                tor_feed = tor['tor_feed']

                controlValueDict = {"NZBurl": tor_url}
                newValueDict = {
                    "NZBprov": tor_prov,
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": tor_size,
                    "NZBtitle": tor_Title,
                    "NZBmode": "torrent",
                    "Status": "Skipped"
                }

                score = (tor_Title_match + tor_Author_match)/2  # as a percentage
                # lose a point for each extra word in the title so we get the closest match
                words = len(getList(torTitle))
                words -= len(getList(author))
                words -= len(getList(title))
                score -= abs(words)
                matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match RSS (%s%%): %s using %s search' %
            (score, nzb_Title, searchtype))

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()

        if snatchedbooks:  # check if one of the other downloaders got there first
            logger.debug('%s already marked snatched' % nzb_Title)
            return True
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                notify_snatch(newValueDict["NZBtitle"] + ' at ' + now())
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                book['bookName']).strip())
    return False
Ejemplo n.º 30
0
def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                              newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False
Ejemplo n.º 31
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                updated = False
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Ejemplo n.º 32
0
def SABnzbd(title=None, nzburl=None, remove_data=False):

    if nzburl in ['delete', 'delhistory'] and title == 'unknown':
        logger.debug('%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl)
        return False

    hostname = lazylibrarian.CONFIG['SAB_HOST']
    port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0)
    if not hostname or not port:
        logger.error('Invalid sabnzbd host or port, check your config')
        return False

    if hostname.endswith('/'):
        hostname = hostname[:-1]
    if not hostname.startswith("http://") and not hostname.startswith("https://"):
        hostname = 'http://' + hostname

    HOST = "%s:%s" % (hostname, port)

    if lazylibrarian.CONFIG['SAB_SUBDIR']:
        HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR']

    params = {}
    if nzburl == 'auth' or nzburl == 'get_cats':
        # connection test, check auth mode or get_cats
        params['mode'] = nzburl
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(%s)' % nzburl
    elif nzburl == 'queue':
        params['mode'] = 'queue'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(Queue)'
    elif nzburl == 'history':
        params['mode'] = 'history'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(History)'
    elif nzburl == 'delete':
        # only deletes tasks if still in the queue, ie NOT completed tasks
        params['mode'] = 'queue'
        params['output'] = 'json'
        params['name'] = nzburl
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(Delete) ' + title
    elif nzburl == 'delhistory':
        params['mode'] = 'history'
        params['output'] = 'json'
        params['name'] = 'delete'
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(DelHistory) ' + title
    else:
        params['mode'] = 'addurl'
        params['output'] = 'json'
        if nzburl:
            params['name'] = nzburl
        if title:
            params['nzbname'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if lazylibrarian.CONFIG['SAB_CAT']:
            params['cat'] = lazylibrarian.CONFIG['SAB_CAT']
        if lazylibrarian.CONFIG['USENET_RETENTION']:
            params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION']

# FUTURE-CODE
#    if lazylibrarian.SAB_PRIO:
#        params["priority"] = lazylibrarian.SAB_PRIO
#    if lazylibrarian.SAB_PP:
#        params["script"] = lazylibrarian.SAB_SCRIPT

    URL = HOST + "/api?" + urlencode(params)

    # to debug because of api
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)
    proxies = proxyList()
    try:
        timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
        r = requests.get(URL, timeout=timeout, proxies=proxies)
        result = r.json()
    except requests.exceptions.Timeout:
        logger.error("Timeout connecting to SAB with URL: %s" % URL)
        return False
    except Exception as e:
        if hasattr(e, 'reason'):
            errmsg = e.reason
        elif hasattr(e, 'strerror'):
            errmsg = e.strerror
        else:
            errmsg = str(e)

        logger.error("Unable to connect to SAB with URL: %s, %s" % (URL, errmsg))
        return False
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug("Result text from SAB: " + str(result))

    if title:
        title = unaccented_str(title)
        if title.startswith('LL.('):
            return result
    if result['status'] is True:
        logger.info("%s sent to SAB successfully." % title)
        # sab versions earlier than 0.8.0 don't return nzo_ids
        if 'nzo_ids' in result:
            if result['nzo_ids']:  # check its not empty
                return result['nzo_ids'][0]
        return 'unknown'
    elif result['status'] is False:
        logger.error("SAB returned Error: %s" % result['error'])
        return False
    else:
        logger.error("Unknown error: " + str(result))
        return False
Ejemplo n.º 33
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + urllib.quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1

                        # skip if no author, no author is no book.
                        try:
                            _ = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = ""
                        except KeyError:
                            bookisbn = ""

                        isbnhead = ""
                        if len(bookisbn) == 10:
                            isbnhead = bookisbn[0:3]
                        elif len(bookisbn) == 13:
                            isbnhead = bookisbn[3:6]

                        try:
                            booklang = item['volumeInfo']['language']
                        except KeyError:
                            booklang = "Unknown"

                        # do we care about language?
                        if "All" not in valid_langs:
                            if bookisbn != "":
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(bookisbn) == 13 and bookisbn.startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(bookisbn) == 10) or \
                                                    (len(bookisbn) == 13 and bookisbn.startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("GB language: " + booklang)

                                    if not match:
                                        # try searching librarything for a language code using the isbn
                                        # if no language found, librarything return value is "invalid" or "unknown"
                                        # librarything returns plain text, not xml
                                        BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn
                                        proxies = proxyList()
                                        try:
                                            librarything_wait()
                                            timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
                                            r = requests.get(BOOK_URL, timeout=timeout, proxies=proxies)
                                            resp = r.text
                                            lt_lang_hits += 1
                                            logger.debug(
                                                "LibraryThing reports language [%s] for %s" % (resp, isbnhead))
                                            if resp != 'invalid' and resp != 'unknown':
                                                booklang = resp  # found a language code
                                                match = True
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))
                                                logger.debug("LT language: " + booklang)
                                        except Exception as e:
                                            booklang = ""
                                            logger.error("%s finding language: %s" % (type(e).__name__, str(e)))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            booknamealt = item['volumeInfo']['title']
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (booknamealt, googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                booknamealt = item['volumeInfo']['title']
                                logger.debug(
                                    'Skipped [%s] with language %s' %
                                    (booknamealt, booklang))
                                ignored += 1
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = ""

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = ""

                        if not booksub:
                            series = ""
                            seriesNum = ""
                        else:
                            try:
                                series = booksub.split('(')[1].split(' Series ')[0]
                            except IndexError:
                                series = ""
                            if series.endswith(')'):
                                series = series[:-1]
                            try:
                                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0]
                                if seriesNum[0] == '#':
                                    seriesNum = seriesNum[1:]
                            except IndexError:
                                seriesNum = ""

                            if not seriesNum and '#' in series:
                                words = series.rsplit('#', 1)
                                series = words[0].strip()
                                seriesNum = words[1].strip()
                            if not seriesNum and ' ' in series:
                                words = series.rsplit(' ', 1)
                                # has to be unicode for isnumeric()
                                if (u"%s" % words[1]).isnumeric():
                                    series = words[0]
                                    seriesNum = words[1]

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'

                        try:
                            bookimg = item['volumeInfo']['imageLinks']['thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = 0

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = ""

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = ""

                        rejected = check_status = False

                        bookname = item['volumeInfo']['title']

                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = True
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            booklink = item['volumeInfo']['canonicalVolumeLink']
                            bookrate = float(bookrate)
                            bookid = item['id']

                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            existing_book = myDB.match('SELECT Status,Manual,BookAdded FROM books WHERE BookID=?',
                                                       (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if bookdate > today()[:len(bookdate)]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, bookdate))
                                removedResults += 1
                                rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = True
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            if book_status != "Ignored" and not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": bookgenre,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": bookdate,
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + bookdate)
                                updated = False
                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:  # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug('Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    # librarything doesn't have series info. Any in the title?
                                    elif series:
                                        seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Ejemplo n.º 34
0
def import_book(pp_path=None, bookID=None):
    try:
        # Move a book into LL folder structure given just the folder and bookID, returns True or False
        # Called from "import_alternate" or if we find an "ll.(xxx)" folder that doesn't match a snatched book/mag
        #
        myDB = database.DBConnection()
        data = myDB.match('SELECT * from books WHERE BookID="%s"' % bookID)
        if data:
            authorname = data['AuthorName']
            bookname = data['BookName']
            processpath = lazylibrarian.DIRECTORY('Destination')

            if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER:
                logger.warn('Please check your EBOOK_DEST_FOLDER setting')
                lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\')

            dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname)
            global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname)
            global_name = unaccented(global_name)
            # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR
            # as windows drive identifiers have colon, eg c:  but no colons allowed elsewhere?
            dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's',
                   ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''}
            dest_path = unaccented_str(replace_all(dest_path, dic))
            dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING)

            processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name)

            if processBook:
                # update nzbs
                was_snatched = myDB.match('SELECT BookID, NZBprov FROM wanted WHERE BookID="%s"' % bookID)
                if was_snatched:
                    controlValueDict = {"BookID": bookID}
                    newValueDict = {"Status": "Processed", "NZBDate": now()}  # say when we processed it
                    myDB.upsert("wanted", newValueDict, controlValueDict)
                    if bookname:
                        if len(lazylibrarian.IMP_CALIBREDB):
                            logger.debug('Calibre should have created the extras')
                        else:
                            processExtras(myDB, dest_path, global_name, data)

                    if not lazylibrarian.DESTINATION_COPY and pp_path != processpath:
                        if os.path.isdir(pp_path):
                            # calibre might have already deleted it?
                            try:
                                shutil.rmtree(pp_path)
                            except Exception as why:
                                logger.debug("Unable to remove %s, %s" % (pp_path, str(why)))

                logger.info('Successfully processed: %s' % global_name)
                notify_download("%s from %s at %s" % (global_name, was_snatched['NZBprov'], now()))
                return True
            else:
                logger.error('Postprocessing for %s has failed.' % global_name)
                logger.error('Warning - Residual files remain in %s.fail' % pp_path)
                was_snatched = len(myDB.select('SELECT BookID FROM wanted WHERE BookID="%s"' % bookID))
                if was_snatched:
                    controlValueDict = {"BookID": bookID}
                    newValueDict = {"Status": "Failed", "NZBDate": now()}
                    myDB.upsert("wanted", newValueDict, controlValueDict)
                # reset status so we try for a different version
                myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % bookID)
                try:
                    os.rename(pp_path, pp_path + '.fail')
                except Exception as e:
                    logger.debug("Unable to rename %s, %s" % (pp_path, str(e)))
        return False
    except Exception as e:
        logger.error('Unhandled exception in importBook: %s' % traceback.format_exc())
Ejemplo n.º 35
0
def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                    ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                    '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                    ':': '', '!': '', '-': ' ', '\s\s': ' '}

        dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
               ',': '', '*': '', ':': '.', ';': '', '\'': ''}

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ", resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith('http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" % (resultTitle, word))
                        break

            size_temp = check_int(res[prefix + 'size'], 1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res['tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [x for x in wordlist if x not in getList(author.lower())]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [i for i, x in enumerate(typelist) if x == item]:
                            score += i + 1

                matches.append([score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info('Nearest match (%s%%): %s using %s search for %s %s' %
                            (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName']))
            else:
                logger.info('Best match (%s%%): %s using %s search, %s priority %s' %
                            (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())
Ejemplo n.º 36
0
def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"
Ejemplo n.º 37
0
def processDestination(pp_path=None,
                       dest_path=None,
                       authorname=None,
                       bookname=None,
                       global_name=None,
                       mode=None):

    # check we got a book/magazine in the downloaded files, if not, return
    if bookname:
        booktype = 'book'
    else:
        booktype = 'mag'

    got_book = False
    for bookfile in os.listdir(pp_path):
        if is_valid_booktype(bookfile, booktype=booktype):
            got_book = bookfile
            break

    if got_book is False:
        # no book/mag found in a format we wanted. Leave for the user to delete or convert manually
        logger.warn(
            'Failed to locate a book/magazine in %s, leaving for manual processing'
            % pp_path)
        return False

    # Do we want calibre to import the book for us
    if bookname and len(lazylibrarian.IMP_CALIBREDB):
        try:
            logger.debug('Importing %s into calibre library' % (global_name))
            # calibre is broken, ignores metadata.opf and book_name.opf
            # also ignores --title and --author as parameters
            # so we have to configure calibre to parse the filename for author/title
            # and rename the book to the format we want calibre to use
            for bookfile in os.listdir(pp_path):
                filename, extn = os.path.splitext(bookfile)
                # calibre does not like quotes in author names
                os.rename(
                    os.path.join(pp_path, filename + extn),
                    os.path.join(pp_path,
                                 global_name.replace('"', '_') + extn))

            params = [
                lazylibrarian.IMP_CALIBREDB,
                'add',
                # '--title="%s"' % bookname,
                # '--author="%s"' % unaccented(authorname),
                '-1',
                '--with-library',
                lazylibrarian.DESTINATION_DIR,
                pp_path
            ]
            logger.debug(str(params))
            res = subprocess.check_output(params, stderr=subprocess.STDOUT)
            if res:
                logger.debug(
                    '%s reports: %s' %
                    (lazylibrarian.IMP_CALIBREDB, unaccented_str(res)))
            # calibre does not like quotes in author names
            calibre_dir = os.path.join(
                lazylibrarian.DESTINATION_DIR,
                unaccented_str(authorname.replace('"', '_')), '')
            if os.path.isdir(calibre_dir):
                imported = LibraryScan(
                    calibre_dir
                )  # rescan authors directory so we get the new book in our database
            else:
                logger.error("Failed to locate calibre dir [%s]" % calibre_dir)
                imported = False
                # imported = LibraryScan(lazylibrarian.DESTINATION_DIR)  # may have to rescan whole library instead
            if not imported and 'already exist' not in res:
                return False
        except subprocess.CalledProcessError as e:
            logger.debug(params)
            logger.debug('calibredb import failed: %s' % e.output)
            return False
        except OSError as e:
            logger.debug('calibredb failed, %s' % e.strerror)
            return False

    else:
        # we are copying the files ourselves, either it's a magazine or we don't want to use calibre
        if not os.path.exists(dest_path):
            logger.debug('%s does not exist, so it\'s safe to create it' %
                         dest_path)
        elif not os.path.isdir(dest_path):
            logger.debug('%s exists but is not a directory, deleting it' %
                         dest_path)
            try:
                os.remove(dest_path)
            except OSError as why:
                logger.debug('Failed to delete %s, %s' %
                             (dest_path, why.strerror))
                return False

        if not os.path.exists(dest_path):
            try:
                os.makedirs(dest_path)
            except OSError as why:
                logger.debug('Failed to create directory %s, %s' %
                             (dest_path, why.strerror))
                return False

        # ok, we've got a target directory, try to copy only the files we want, renaming them on the fly.
        for fname in os.listdir(pp_path):
            if fname.lower().endswith(".jpg") or fname.lower().endswith(".opf") or \
                    is_valid_booktype(fname, booktype=booktype):
                logger.debug('Copying %s to directory %s' % (fname, dest_path))
                try:
                    shutil.copyfile(
                        os.path.join(pp_path, fname),
                        os.path.join(dest_path,
                                     global_name + os.path.splitext(fname)[1]))
                except Exception as why:
                    logger.debug("Failed to copy file %s to %s, %s" %
                                 (fname, dest_path, str(why)))
                    return False
            else:
                logger.debug('Ignoring unwanted file: %s' % fname)

    # calibre or ll copied the files we want, now delete source files if not in download root dir
    # and if DESTINATION_COPY not set, but don't delete source files if copy failed
    # also we shouldn't delete if source was a torrent as we may be seeding
    if mode is None:
        mode = 'unknown'  # no mode for alternate_import
    if mode is not 'torrent' and mode is not 'magnet':
        if not lazylibrarian.DESTINATION_COPY:
            if pp_path != lazylibrarian.DOWNLOAD_DIR:
                if os.path.isdir(pp_path):
                    # calibre might have already deleted it
                    try:
                        shutil.rmtree(pp_path)
                    except Exception as why:
                        logger.debug("Unable to remove %s, %s" %
                                     (pp_path, str(why)))
                        return False
    return True