Python getList Examples

Programming Language: Python

Namespace/Package Name: lazylibrarian.formatter

Method/Function: getList

Examples at hotexamples.com: 57

Python getList - 57 examples found. These are the top rated real world Python examples of lazylibrarian.formatter.getList extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: __init__.py Project: andrzejc/LazyLibrarian

def build_monthtable():
    if len(formatter.getList(IMP_MONTHLANG)) == 0:  # any extra languages wanted?
        return
    try:
        current_locale = locale.setlocale(locale.LC_ALL, "")  # read current state.
        # getdefaultlocale() doesnt seem to work as expected on windows, returns 'None'
    except locale.Error as e:
        logger.debug("Error getting current locale : %s" % str(e))
        return

    lang = str(current_locale)
    if not lang.startswith("en_"):  # en_ is preloaded
        MONTHNAMES[0].append(lang)
        for f in range(1, 13):
            MONTHNAMES[f].append(common.remove_accents(calendar.month_name[f]).lower())
        MONTHNAMES[0].append(lang)
        for f in range(1, 13):
            MONTHNAMES[f].append(common.remove_accents(calendar.month_abbr[f]).lower().strip("."))
            logger.info(
                "Added month names for locale [%s], %s, %s ..."
                % (lang, MONTHNAMES[1][len(MONTHNAMES[1]) - 2], MONTHNAMES[1][len(MONTHNAMES[1]) - 1])
            )

    for lang in formatter.getList(IMP_MONTHLANG):
        try:
            if len(lang) > 1:
                locale.setlocale(locale.LC_ALL, lang)
                MONTHNAMES[0].append(lang)
                for f in range(1, 13):
                    MONTHNAMES[f].append(common.remove_accents(calendar.month_name[f]).lower())
                MONTHNAMES[0].append(lang)
                for f in range(1, 13):
                    MONTHNAMES[f].append(common.remove_accents(calendar.month_abbr[f]).lower().strip("."))
                locale.setlocale(locale.LC_ALL, current_locale)  # restore entry state
                logger.info(
                    "Added month names for locale [%s], %s, %s ..."
                    % (lang, MONTHNAMES[1][len(MONTHNAMES[1]) - 2], MONTHNAMES[1][len(MONTHNAMES[1]) - 1])
                )
        except:
            locale.setlocale(locale.LC_ALL, current_locale)  # restore entry state
            logger.warn("Unable to load requested locale [%s]" % lang)
            try:
                if "_" in lang:
                    wanted_lang = lang.split("_")[0]
                else:
                    wanted_lang = lang
                params = ["locale", "-a"]
                all_locales = subprocess.check_output(params).split()
                locale_list = []
                for a_locale in all_locales:
                    if a_locale.startswith(wanted_lang):
                        locale_list.append(a_locale)
                if locale_list:
                    logger.warn("Found these alternatives: " + str(locale_list))
                else:
                    logger.warn("Unable to find an alternative")
            except:
                logger.warn("Unable to get a list of alternatives")
            logger.info("Set locale back to entry state %s" % current_locale)

Example #2

Show file

File: providers.py Project: knobunc/LazyLibrarian

def get_searchterm(book, searchType):
    authorname = cleanName(book['authorName'], "'")
    bookname = cleanName(book['bookName'], "'")
    if searchType in ['book', 'audio'] or 'short' in searchType:
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt
        # and Charles H. Elliott, Phd -> Charles Elliott
        # but Tom Holt -> Tom Holt
        # Calibre directories may have trailing '.' replaced by '_'  eg Jr_
        if ' ' in authorname:
            authorname_exploded = authorname.split(' ')
            authorname = ''
            postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
            for word in authorname_exploded:
                word = word.strip('.').strip('_')
                if len(word) > 1 and word.lower() not in postfix:
                    if authorname:
                        authorname += ' '
                    authorname += word

        if 'short' in searchType and '(' in bookname:
            bookname = bookname.split('(')[0].strip()

    return authorname, bookname

Example #3

Show file

def get_searchterm(book, searchType):
    authorname = cleanName(book['authorName'], "'")
    bookname = cleanName(book['bookName'], "'")
    if searchType in ['book', 'audio'] or 'short' in searchType:
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt
        # and Charles H. Elliott, Phd -> Charles Elliott
        # but Tom Holt -> Tom Holt
        # Calibre directories may have trailing '.' replaced by '_'  eg Jr_
        if ' ' in authorname:
            authorname_exploded = authorname.split(' ')
            authorname = ''
            postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
            for word in authorname_exploded:
                word = word.strip('.').strip('_')
                if len(word) > 1 and word.lower() not in postfix:
                    if authorname:
                        authorname += ' '
                    authorname += word

        if 'short' in searchType and '(' in bookname:
            bookname = bookname.split('(')[0].strip()

    return authorname, bookname

Example #4

Show file

File: common.py Project: kuuratsanik/LazyLibrarian

def formatAuthorName(author):
    """ get authorame in a consistent format """

    if "," in author:
        postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
        words = author.split(',')
        if len(words) == 2:
            # Need to handle names like "L. E. Modesitt, Jr." or "J. Springmann, Phd"
            # use an exceptions list for now, there might be a better way...
            if words[1].strip().strip('.').strip('_').lower() in postfix:
                surname = words[1].strip()
                forename = words[0].strip()
            else:
                # guess its "surname, forename" or "surname, initial(s)" so swap them round
                forename = words[1].strip()
                surname = words[0].strip()
            if author != forename + ' ' + surname:
                logger.debug('Formatted authorname [%s] to [%s %s]' %
                             (author, forename, surname))
                author = forename + ' ' + surname
    # reformat any initials, we want to end up with L.E. Modesitt Jr
    if len(author) > 2 and author[1] in '. ':
        surname = author
        forename = ''
        while len(surname) > 2 and surname[1] in '. ':
            forename = forename + surname[0] + '.'
            surname = surname[2:].strip()
        if author != forename + ' ' + surname:
            logger.debug('Stripped authorname [%s] to [%s %s]' %
                         (author, forename, surname))
            author = forename + ' ' + surname

    return ' '.join(author.split())  # ensure no extra whitespace

Example #5

Show file

File: utorrent.py Project: kuuratsanik/LazyLibrarian-1

    def _action(self, params, body=None, content_type=None):
        # noinspection PyTypeChecker
        url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(
            params)
        request = Request(url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', getUserAgent())

        if body:
            if PY2:
                request.add_data(body)
            else:
                request.data(body)
            request.add_header('Content-length', len(body))
        if content_type:
            request.add_header('Content-type', content_type)

        try:
            response = self.opener.open(request)
            return response.code, json.loads(response.read())
        except HTTPError as err:
            logger.debug('URL: %s' % url)
            logger.debug('uTorrent webUI raised the following error: ' +
                         str(err))

Example #6

Show file

def proxyList():
    proxies = None
    if lazylibrarian.CONFIG['PROXY_HOST']:
        proxies = {}
        for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
            if item in ['http', 'https']:
                proxies.update({item: lazylibrarian.CONFIG['PROXY_HOST']})
    return proxies

Example #7

Show file

File: postprocess.py Project: Trixter69/LazyLibrarian

def processDestination(pp_path=None, dest_path=None, authorname=None, bookname=None, global_name=None, book_id=None):

    pp_path = pp_path.encode(lazylibrarian.SYS_ENCODING)

    # check we got a book in the downloaded files
    pp = False
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for bookfile in os.listdir(pp_path):
        if ((str(bookfile).split('.')[-1]) in booktype_list):    
            pp = True
    if pp == False:
        # no book found in a format we wanted. Leave for the user to delete or convert manually
        logger.debug('Failed to locate a book in downloaded files, leaving for manual processing')
        return pp    
        
    try:
        if not os.path.exists(dest_path):
            logger.debug('%s does not exist, so it\'s safe to create it' % dest_path)
        else:
            logger.debug('%s already exists. Removing existing tree.' % dest_path)
            shutil.rmtree(dest_path)

        logger.debug('Attempting to copy/move tree')
        if lazylibrarian.DESTINATION_COPY == 1 and lazylibrarian.DOWNLOAD_DIR != pp_path:
            shutil.copytree(pp_path, dest_path)
            logger.debug('Successfully copied %s to %s.' % (pp_path, dest_path))
        elif lazylibrarian.DOWNLOAD_DIR == pp_path:
            booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
            for file3 in os.listdir(pp_path):
                if ((str(file3).split('.')[-1]) in booktype_list):
                    bookID = str(file3).split("LL.(")[1].split(")")[0]
                    if bookID == book_id:
                        logger.debug('Processing %s' % bookID)
                        if not os.path.exists(dest_path):
                            try:
                                os.makedirs(dest_path)
                            except Exception, e:
                                logger.debug(str(e))
                        if lazylibrarian.DESTINATION_COPY == 1:
                            shutil.copyfile(os.path.join(pp_path, file3), os.path.join(dest_path, file3))
                        else:
                            shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3))
        else:

Example #8

Show file

File: qbittorrent.py Project: jhorourke2/LazyLibrarian

    def _command(self, command, args=None, content_type=None, files=None):
        logger.debug('QBittorrent WebAPI Command: %s' % command)
        url = self.base_url + '/' + command
        data = None
        headers = dict()

        if files or content_type == 'multipart/form-data':
            data, headers = encode_multipart(
                args, files, '-------------------------acebdf13572468')
        else:
            if args:
                data = makeBytestr(urlencode(args))
            if content_type:
                headers['Content-Type'] = content_type

        request = Request(url, data, headers)

        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = self.opener.open(request)
            try:
                contentType = response.headers['content-type']
            except KeyError:
                contentType = ''

            resp = response.read()
            # some commands return json
            if contentType == 'application/json':
                if resp:
                    return json.loads(resp)
                return ''
            else:
                # some commands return plain text
                resp = makeUnicode(resp)
                logger.debug("QBitTorrent returned %s" % resp)
                if command == 'version/api':
                    return resp
                # some just return Ok. or Fails.
                if resp and resp != 'Ok.':
                    return False
            # some commands return nothing but response code (always 200)
            return True
        except URLError as err:
            logger.debug('Failed URL: %s' % url)
            logger.debug('QBitTorrent webUI raised the following error: %s' %
                         err.reason)
            return False

Example #9

Show file

File: qbittorrent.py Project: DobyTang/LazyLibrarian

    def _command(self, command, args=None, content_type=None, files=None):
        logger.debug('QBittorrent WebAPI Command: %s' % command)
        url = self.base_url + '/' + command
        data = None
        headers = dict()

        if files or content_type == 'multipart/form-data':
            data, headers = encode_multipart(args, files, '-------------------------acebdf13572468')
        else:
            if args:
                data = makeBytestr(urlencode(args))
            if content_type:
                headers['Content-Type'] = content_type

        request = Request(url, data, headers)

        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', getUserAgent())

        try:
            response = self.opener.open(request)
            try:
                contentType = response.headers['content-type']
            except KeyError:
                contentType = ''

            resp = response.read()
            # some commands return json
            if contentType == 'application/json':
                if resp:
                    return json.loads(resp)
                return ''
            else:
                # some commands return plain text
                resp = makeUnicode(resp)
                logger.debug("QBitTorrent returned %s" % resp)
                if command == 'version/api':
                    return resp
                # some just return Ok. or Fails.
                if resp and resp != 'Ok.':
                    return False
            # some commands return nothing but response code (always 200)
            return True
        except URLError as err:
            logger.debug('Failed URL: %s' % url)
            logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason)
            return False

Example #10

Show file

 def multiLink(self, bookfile, bookid):
     types = []
     multi = ''
     basename, extn = os.path.splitext(bookfile)
     for item in getList(lazylibrarian.CONFIG['EBOOK_TYPE']):
         target = basename + '.' + item
         if os.path.isfile(target):
             types.append(item)
     if len(types) > 1:
         for fmt in types:
             multi += '<link href="'
             multi += '%s?cmd=Serve&amp;bookid=%s&amp;fmt=%s' % (
                 self.opdsroot, quote_plus(bookid), fmt)
             multi += '" rel="http://opds-spec.org/acquisition" type="' + mimeType(
                 '.' + fmt) + '"/>'
     return multi

Example #11

Show file

File: bookrename.py Project: cdancette/LazyLibrarian

def seriesInfo(bookid):
    """ Return series info for a bookid as a dict of formatted strings
        The strings are configurable, but by default...
        Full returns ( Lord of the Rings 2 )
        Name returns Lord of the Rings (with added Num part if that's not numeric, eg Lord of the Rings Book One)
        Num  returns Book #1 -    (or empty string if no numeric part)
        so you can combine to make Book #1 - Lord of the Rings  """
    mydict = {'Name': '', 'Full': '', 'Num': ''}
    myDB = database.DBConnection()
    cmd = 'SELECT SeriesID,SeriesNum from member WHERE bookid=?'
    res = myDB.match(cmd, (bookid,))
    if not res:
        return mydict

    seriesid = res['SeriesID']
    serieslist = getList(res['SeriesNum'])
    seriesnum = ''
    seriesname = ''
    # might be "Book 3.5" or similar, just get the numeric part
    while serieslist:
        seriesnum = serieslist.pop()
        try:
            _ = float(seriesnum)
            break
        except ValueError:
            seriesnum = ''
            pass

    if not seriesnum:
        # couldn't figure out number, keep everything we got, could be something like "Book Two"
        serieslist = res['SeriesNum']

    cmd = 'SELECT SeriesName from series WHERE seriesid=?'
    res = myDB.match(cmd, (seriesid,))
    if res:
        seriesname = res['SeriesName']
        if not seriesnum:
            # add what we got back to end of series name
            if serieslist:
                seriesname = "%s %s" % (seriesname, serieslist)

    mydict['Name'] = lazylibrarian.CONFIG['FMT_SERNAME'].replace('$SerName', seriesname).replace('$$', ' ')
    mydict['Num'] = lazylibrarian.CONFIG['FMT_SERNUM'].replace('$SerNum', seriesnum).replace('$$', ' ')
    mydict['Full'] = lazylibrarian.CONFIG['FMT_SERIES'].replace('$SerNum', seriesnum).replace(
                        '$SerName', seriesname).replace('$$', ' ')
    return mydict

Example #12

Show file

File: qbittorrent.py Project: steflavoie/LazyLibrarian

    def _command(self, command, args=None, content_type=None, files=None):
        logger.debug('QBittorrent WebAPI Command: %s' % command)
        url = self.base_url + '/' + command
        data = None
        headers = dict()
        if files:  # Use Multipart form
            data, headers = encode_multipart(
                args, files, '-------------------------acebdf13572468')
        else:
            if args:
                data = urllib.urlencode(args)
            if content_type:
                headers['Content-Type'] = content_type

        request = urllib2.Request(url, data, headers)

        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = self.opener.open(request)
            info = response.info()

            if info:
                if info.getheader('content-type'):
                    if info.getheader('content-type') == 'application/json':
                        return json.loads(response.read())
                        # response code is always 200, whether success or fail
                    else:
                        resp = ''
                        for line in response:
                            resp = resp + line
                        logger.debug("QBitTorrent returned %s" % resp)
                        return False
            return True
        except urllib2.URLError as err:
            logger.debug('Failed URL: %s' % url)
            logger.debug('QBitTorrent webUI raised the following error: %s' %
                         err.reason)
            return False

Example #13

Show file

    def get_author_info(self, authorid=None):

        URL = 'http://www.goodreads.com/author/show/' + authorid + '.xml?' + urllib.urlencode(self.params)
        author_dict = {}

        try:
            rootxml, in_cache = get_xml_request(URL)
        except Exception as e:
            logger.error("Error getting author info: %s" % str(e))
            return author_dict
        if rootxml is None:
            logger.debug("Error requesting author info")
            return author_dict

        resultxml = rootxml.find('author')

        if not len(resultxml):
            logger.warn('No author found with ID: ' + authorid)
        else:
            # added authorname to author_dict - this holds the intact name preferred by GR
            # except GR messes up names like "L. E. Modesitt, Jr." where it returns <name>Jr., L. E. Modesitt</name>
            authorname = resultxml[1].text
            if "," in authorname:
                postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
                words = authorname.split(',')
                if len(words) == 2:
                    if words[0].strip().strip('.').lower in postfix:
                        authorname = words[1].strip() + ' ' + words[0].strip()

            logger.debug("[%s] Processing info for authorID: %s" % (authorname, authorid))
            author_dict = {
                'authorid': resultxml[0].text,
                'authorlink': resultxml.find('link').text,
                'authorimg': resultxml.find('image_url').text,
                'authorborn': resultxml.find('born_at').text,
                'authordeath': resultxml.find('died_at').text,
                'totalbooks': resultxml.find('works_count').text,
                'authorname': ' '.join(authorname.split())  # remove any extra whitespace
            }
        return author_dict

Example #14

Show file

File: sabnzbd.py Project: forge33/LazyLibrarian

def checkLink():
    # connection test, check host/port
    auth = SABnzbd(nzburl='auth')
    if not auth:
        return "Unable to talk to SABnzbd, check HOST/PORT"
    # check apikey is valid
    cats = SABnzbd(nzburl='get_cats')
    if not cats:
        return "Unable to talk to SABnzbd, check APIKEY"
    # check category exists
    if lazylibrarian.SAB_CAT:
        catlist = formatter.getList(cats)
        if not lazylibrarian.SAB_CAT in catlist:
            msg = "SABnzbd: Unknown category [%s]\n" % lazylibrarian.SAB_CAT
            if catlist:
                msg += "Valid categories:\n"
                for cat in catlist:
                    msg += '%s\n' % cat
            else:
                msg += "SABnzbd seems to have no categories set"
            return msg
    return "SABnzbd connection successful"

Example #15

Show file

def checkLink():
    # connection test, check host/port
    auth = SABnzbd(nzburl='auth')
    if not auth:
        return "Unable to talk to SABnzbd, check HOST/PORT"
    # check apikey is valid
    cats = SABnzbd(nzburl='get_cats')
    if not cats:
        return "Unable to talk to SABnzbd, check APIKEY"
    # check category exists
    if lazylibrarian.SAB_CAT:
        catlist = formatter.getList(cats)
        if lazylibrarian.SAB_CAT not in catlist:
            msg = "SABnzbd: Unknown category [%s]\n" % lazylibrarian.SAB_CAT
            if catlist:
                msg += "Valid categories:\n"
                for cat in catlist:
                    msg += '%s\n' % cat
            else:
                msg += "SABnzbd seems to have no categories set"
            return msg
    return "SABnzbd connection successful"

Example #16

Show file

File: utorrent.py Project: andyuc88/LazyLibrarian

    def _action(self, params, body=None, content_type=None):
        url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(params)
        request = Request(url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', USER_AGENT)

        if body:
            if PY2:
                request.add_data(body)
            else:
                request.data(body)
            request.add_header('Content-length', len(body))
        if content_type:
            request.add_header('Content-type', content_type)

        try:
            response = self.opener.open(request)
            return response.code, json.loads(response.read())
        except HTTPError as err:
            logger.debug('URL: %s' % url)
            logger.debug('uTorrent webUI raised the following error: ' + str(err))

Example #17

Show file

File: librarysync.py Project: duckville/LazyLibrarian

def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn("Cannot find directory: %s. Not scanning" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))
        return

    myDB = database.DBConnection()

    myDB.action("drop table if exists stats")
    myDB.action(
        "create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )"
    )

    logger.info("Scanning ebook directory: %s" % dir.decode(lazylibrarian.SYS_ENCODING, "replace"))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select('select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info("Missing books will be marked as %s" % status)
        for book in books:
            bookName = book["BookName"]
            bookAuthor = book["AuthorName"]
            bookID = book["BookID"]
            bookfile = book["BookFile"]

            if not (bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn("Book %s - %s updated as not found on disk" % (bookAuthor, bookName))

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ""
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + "\\" + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ""
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + "|" + book_type
    matchString = (
        matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace("\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)")
        + "\.["
        + booktypes
        + "]"
    )
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, "")
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0

                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s"
                        % (
                            dir.decode(lazylibrarian.SYS_ENCODING, "replace"),
                            subdirectory.decode(lazylibrarian.SYS_ENCODING, "replace"),
                        )
                    )
                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split(".")
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if "title" in res and "creator" in res:  # this is the minimum we need
                            match = 1
                            book = res["title"]
                            author = res["creator"]
                            if "language" in res:
                                language = res["language"]
                            if "identifier" in res:
                                isbn = res["identifier"]
                            if "type" in res:
                                extn = res["type"]
                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn))
                        else:
                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if "title" in res and "creator" in res:  # this is the minimum we need
                        match = 1
                        book = res["title"]
                        author = res["creator"]
                        if "language" in res:
                            language = res["language"]
                        if "identifier" in res:
                            isbn = res["identifier"]
                        logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone()
                            if not match:
                                myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                            else:
                                logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(",")
                            author = words[1].strip() + " " + words[0].strip()  # "forename surname"
                        if author[1] == " ":
                            author = author.replace(" ", ".")
                            author = author.replace("..", ".")

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' % author
                        ).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn("Error finding author id for [%s]" % author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr["authorname"]

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace(".", "_")
                                match_auth = match_auth.replace(" ", "_")
                                match_auth = match_auth.replace("__", "_")
                                match_name = authorname.replace(".", "_")
                                match_name = match_name.replace(" ", "_")
                                match_name = match_name.replace("__", "_")
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug("Failed to match author [%s] fuzz [%d]" % (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)
                                    )

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr["authorname"]
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' % author
                                    ).fetchone()
                                    if not check_exist_author:
                                        logger.debug("Adding new author [%s]" % author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' % author
                                            ).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug("Failed to match author [%s] in database" % author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', "").replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)
                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)
                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' % bookid
                                ).fetchone()
                                if check_status["Status"] != "Open":
                                    # update status as we've got this book
                                    myDB.action('UPDATE books set Status="Open" where BookID="%s"' % bookid)
                                    book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open
                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)
                                    )
                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info("%s new/modified books found and added to the database" % new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats"
    ).fetchone()
    if stats["sum(GR_book_hits)"] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug("GoogleBooks was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoogleBooks language was changed %s times" % stats["sum(GB_lang_change)"])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug("GoodReads was hit %s times for books" % stats["sum(GR_book_hits)"])
            logger.debug("GoodReads was hit %s times for languages" % stats["sum(GR_lang_hits)"])
        logger.debug("LibraryThing was hit %s times for languages" % stats["sum(LT_lang_hits)"])
        logger.debug("Language cache was hit %s times" % stats["sum(cache_hits)"])
        logger.debug("Unwanted language removed %s books" % stats["sum(bad_lang)"])
        logger.debug("Unwanted characters removed %s books" % stats["sum(bad_char)"])
        logger.debug("Unable to cache %s books with missing ISBN" % stats["sum(uncached)"])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize["counter"])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select("select AuthorName from authors")
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug("Updating bookcounts for %i authors" % len(authors))
    for author in authors:
        name = author["AuthorName"]
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")'
            % name
        ).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks["counter"], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % name
        ).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (totalbooks["counter"], name))

    logger.info("Library scan complete")

Example #18

Show file

File: gb.py Project: andyuc88/LazyLibrarian

    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (
                                        book['author'].replace('"', '""'),))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())

Example #19

Show file

def setWorkID(books=None):
    """ Set the goodreads workid for any books that don't already have one
        books is a comma separated list of bookids or if empty, select from database
        Paginate requests to reduce api hits """

    myDB = database.DBConnection()
    pages = []
    if books:
        page = books
        pages.append(page)
    else:
        cmd = "select BookID,BookName from books where WorkID='' or WorkID is null"
        books = myDB.select(cmd)
        if books:
            counter = 0
            logger.debug('Setting WorkID for %s book%s' %
                         (len(books), plural(len(books))))
            page = ''
            for book in books:
                bookid = book['BookID']
                if not bookid:
                    logger.debug("No bookid for %s" % book['BookName'])
                else:
                    if page:
                        page = page + ','
                    page = page + bookid
                    counter += 1
                    if counter == 50:
                        counter = 0
                        pages.append(page)
                        page = ''
            if page:
                pages.append(page)

    counter = 0
    params = {"key": lazylibrarian.CONFIG['GR_API']}
    for page in pages:
        URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode(
            params)
        try:
            rootxml, in_cache = gr_xml_request(URL, useCache=False)
            if rootxml is None:
                logger.debug("Error requesting id_to_work_id page")
            else:
                resultxml = rootxml.find('work-ids')
                if len(resultxml):
                    ids = resultxml.getiterator('item')
                    books = getList(page)
                    cnt = 0
                    for item in ids:
                        workid = item.text
                        if not workid:
                            logger.debug("No workid returned for %s" %
                                         books[cnt])
                        else:
                            counter += 1
                            controlValueDict = {"BookID": books[cnt]}
                            newValueDict = {"WorkID": workid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                        cnt += 1

        except Exception as e:
            logger.error("%s parsing id_to_work_id page: %s" %
                         (type(e).__name__, str(e)))

    msg = 'Updated %s id%s' % (counter, plural(counter))
    logger.debug("setWorkID complete: " + msg)
    return msg

Example #20

Show file

File: calibre.py Project: petetomasik/LazyLibrarian

def syncCalibreList(col_read=None, col_toread=None, userid=None):
    """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs,
        and sync current/supplied user's read/toread or supplied read/toread columns to calibre database.
        Return message giving totals """

    myDB = database.DBConnection()
    if not userid:
        cookie = cherrypy.request.cookie
        if cookie and 'll_uid' in cookie.keys():
            userid = cookie['ll_uid'].value
    if userid:
        res = myDB.match(
            'SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?',
            (userid, ))
        if res:
            username = res['UserName']
            if not col_read:
                col_read = res['CalibreRead']
            if not col_toread:
                col_toread = res['CalibreToRead']
            toreadlist = getList(res['ToRead'])
            readlist = getList(res['HaveRead'])
            # suppress duplicates (just in case)
            toreadlist = list(set(toreadlist))
            readlist = list(set(readlist))
        else:
            return "Error: Unable to get user column settings for %s" % userid

    if not userid:
        return "Error: Unable to find current userid"

    if not col_read and not col_toread:
        return "User %s has no calibre columns set" % username

    # check user columns exist in calibre and create if not
    res = calibredb('custom_columns')
    columns = res[0].split('\n')
    custom_columns = []
    for column in columns:
        if column:
            custom_columns.append(column.split(' (')[0])

    if col_read not in custom_columns:
        added = calibredb('add_custom_column', [col_read, col_read, 'bool'])
        if "column created" not in added[0]:
            return added
    if col_toread not in custom_columns:
        added = calibredb('add_custom_column',
                          [col_toread, col_toread, 'bool'])
        if "column created" not in added[0]:
            return added

    nomatch = 0
    readcol = ''
    toreadcol = ''
    map_ctol = {}
    map_ltoc = {}
    if col_read:
        readcol = '*' + col_read
    if col_toread:
        toreadcol = '*' + col_toread

    calibre_list = calibreList(col_read, col_toread)
    if not isinstance(calibre_list, list):
        # got an error message from calibredb
        return '"%s"' % calibre_list

    for item in calibre_list:
        if toreadcol and toreadcol in item or readcol and readcol in item:
            authorname, authorid, added = addAuthorNameToDB(item['authors'],
                                                            refresh=False,
                                                            addbooks=False)
            if authorname:
                if authorname != item['authors']:
                    logger.debug(
                        "Changed authorname for [%s] from [%s] to [%s]" %
                        (item['title'], item['authors'], authorname))
                    item['authors'] = authorname
                bookid = find_book_in_db(authorname, item['title'])
                if not bookid:
                    searchterm = "%s <ll> %s" % (item['title'], authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.debug(
                                "Found (%s%% %s%%) %s: %s" %
                                (result['author_fuzz'], result['book_fuzz'],
                                 result['authorname'], result['bookname']))
                            bookid = result['bookid']
                            import_book(bookid)
                if bookid:
                    # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string
                    # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric)
                    # so convert all dict items to strings for ease of matching.
                    map_ctol[str(item['id'])] = str(bookid)
                    map_ltoc[str(bookid)] = str(item['id'])
                else:
                    logger.warn(
                        'Calibre Book [%s] by [%s] is not in lazylibrarian database'
                        % (item['title'], authorname))
                    nomatch += 1
            else:
                logger.warn(
                    'Calibre Author [%s] not matched in lazylibrarian database'
                    % (item['authors']))
                nomatch += 1

    # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones
    # which might be books calibre doesn't have, or might be minor differences in author or title

    for idlist in [("Read", readlist), ("To_Read", toreadlist)]:
        booklist = idlist[1]
        for bookid in booklist:
            cmd = "SELECT AuthorID,BookName from books where BookID=?"
            book = myDB.match(cmd, (bookid, ))
            if not book:
                logger.error('Error finding bookid %s' % bookid)
            else:
                cmd = "SELECT AuthorName from authors where AuthorID=?"
                author = myDB.match(cmd, (book['AuthorID'], ))
                if not author:
                    logger.error('Error finding authorid %s' %
                                 book['AuthorID'])
                else:
                    match = False
                    for item in calibre_list:
                        if item['authors'] == author['AuthorName'] and item[
                                'title'] == book['BookName']:
                            logger.debug("Exact match for %s [%s]" %
                                         (idlist[0], book['BookName']))
                            map_ctol[str(item['id'])] = str(bookid)
                            map_ltoc[str(bookid)] = str(item['id'])
                            match = True
                            break
                    if not match:
                        high = 0
                        highname = ''
                        highid = ''
                        for item in calibre_list:
                            if item['authors'] == author['AuthorName']:
                                n = fuzz.token_sort_ratio(
                                    item['title'], book['BookName'])
                                if n > high:
                                    high = n
                                    highname = item['title']
                                    highid = item['id']

                        if high > 95:
                            logger.debug(
                                "Found ratio match %s%% [%s] for %s [%s]" %
                                (high, highname, idlist[0], book['BookName']))
                            map_ctol[str(highid)] = str(bookid)
                            map_ltoc[str(bookid)] = str(highid)
                            match = True

                    if not match:
                        logger.warn(
                            "No match for %s %s by %s in calibre database, closest match %s%% [%s]"
                            % (idlist[0], book['BookName'],
                               author['AuthorName'], high, highname))
                        nomatch += 1

    logger.debug("BookID mapping complete, %s match %s, nomatch %s" %
                 (username, len(map_ctol), nomatch))

    # now sync the lists
    if userid:
        last_read = []
        last_toread = []
        calibre_read = []
        calibre_toread = []

        cmd = 'select SyncList from sync where UserID=? and Label=?'
        res = myDB.match(cmd, (userid, col_read))
        if res:
            last_read = getList(res['SyncList'])
        res = myDB.match(cmd, (userid, col_toread))
        if res:
            last_toread = getList(res['SyncList'])

        for item in calibre_list:
            if toreadcol and toreadcol in item and item[
                    toreadcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_toread.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre to_read book %s:%s has no lazylibrarian bookid"
                        % (item['authors'], item['title']))
            if readcol and readcol in item and item[readcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_read.append(map_ctol[str(item['id'])])
                else:
                    logger.warn(
                        "Calibre read book %s:%s has no lazylibrarian bookid" %
                        (item['authors'], item['title']))

        logger.debug("Found %s calibre read, %s calibre toread" %
                     (len(calibre_read), len(calibre_toread)))
        logger.debug("Found %s lazylib read, %s lazylib toread" %
                     (len(readlist), len(toreadlist)))

        added_to_ll_toread = list(set(toreadlist) - set(last_toread))
        removed_from_ll_toread = list(set(last_toread) - set(toreadlist))
        added_to_ll_read = list(set(readlist) - set(last_read))
        removed_from_ll_read = list(set(last_read) - set(readlist))
        logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" %
                     (len(added_to_ll_toread), len(removed_from_ll_toread),
                      len(added_to_ll_read), len(removed_from_ll_read)))

        added_to_calibre_toread = list(set(calibre_toread) - set(last_toread))
        removed_from_calibre_toread = list(
            set(last_toread) - set(calibre_toread))
        added_to_calibre_read = list(set(calibre_read) - set(last_read))
        removed_from_calibre_read = list(set(last_read) - set(calibre_read))
        logger.debug(
            "calibre changes to copy to lazylibrarian: %s %s %s %s" %
            (len(added_to_calibre_toread), len(removed_from_calibre_toread),
             len(added_to_calibre_read), len(removed_from_calibre_read)))

        calibre_changes = 0
        for item in added_to_calibre_read:
            if item not in readlist:
                readlist.append(item)
                logger.debug("Lazylibrarian marked %s as read" % item)
                calibre_changes += 1
        for item in added_to_calibre_toread:
            if item not in toreadlist:
                toreadlist.append(item)
                logger.debug("Lazylibrarian marked %s as to_read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_read:
            if item in readlist:
                readlist.remove(item)
                logger.debug("Lazylibrarian removed %s from read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_toread:
            if item in toreadlist:
                toreadlist.remove(item)
                logger.debug("Lazylibrarian removed %s from to_read" % item)
                calibre_changes += 1
        if calibre_changes:
            myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?',
                        (', '.join(toreadlist), ', '.join(readlist), userid))
        ll_changes = 0
        for item in added_to_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_toread, item))
        for item in removed_from_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_toread, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_toread, item))

        for item in added_to_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], 'true'],
                                         [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" %
                            (col_read, item))

        for item in removed_from_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom',
                                         [col_read, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" %
                            (col_read, item))

        # store current sync list as comparison for next sync
        controlValueDict = {"UserID": userid, "Label": col_read}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(readlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)
        controlValueDict = {"UserID": userid, "Label": col_toread}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(toreadlist)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        msg = "%s sync updated: %s calibre, %s lazylibrarian" % (
            username, ll_changes, calibre_changes)
    return msg

Example #21

Show file

def bts_file(search_dir=None):
    if 'bts' not in getList(lazylibrarian.CONFIG['SKIPPED_EXT']):
        return ''
    return any_file(search_dir, '.bts')

Example #22

Show file

File: searchnzb.py Project: dmfinley/LazyLibrarian

def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        "...": "",
        ".": " ",
        " & ": " ",
        " = ": " ",
        "?": "",
        "$": "s",
        " + ": " ",
        '"': "",
        ",": " ",
        "*": "",
        "(": "",
        ")": "",
        "[": "",
        "]": "",
        "#": "",
        "0": "",
        "1": "",
        "2": "",
        "3": "",
        "4": "",
        "5": "",
        "6": "",
        "7": "",
        "8": "",
        "9": "",
        "'": "",
        ":": "",
        "!": "",
        "-": " ",
        "\s\s": " ",
    }
    # ' the ': ' ', ' a ': ' ', ' and ': ' ',
    # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {
        "...": "",
        ".": " ",
        " & ": " ",
        " = ": " ",
        "?": "",
        "$": "s",
        " + ": " ",
        '"': "",
        ",": "",
        "*": "",
        ":": "",
        ";": "",
        "'": "",
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)
    author = formatter.latinToAscii(formatter.replace_all(book["authorName"], dic))
    title = formatter.latinToAscii(formatter.replace_all(book["bookName"], dic))

    matches = []
    for nzb in resultlist:
        nzb_Title = formatter.latinToAscii(formatter.replace_all(nzb["nzbtitle"], dictrepl)).strip()
        nzb_Title = re.sub(r"\s\s+", " ", nzb_Title)  # remove extra whitespace

        nzbAuthor_match = fuzz.token_set_ratio(author, nzb_Title)
        nzbBook_match = fuzz.token_set_ratio(title, nzb_Title)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzb_Title))

        rejected = False
        for word in reject_list:
            if word in nzb_Title.lower() and not word in author.lower() and not word in title.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (nzb_Title, word))
                break

        nzbsize_temp = nzb["nzbsize"]  # Need to cater for when this is NONE (Issue 35)
        if nzbsize_temp is None:
            nzbsize_temp = 1000
        nzbsize = round(float(nzbsize_temp) / 1048576, 2)

        maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if maxsize and nzbsize > maxsize:
            rejected = True
            logger.debug("Rejecting %s, too large" % nzb_Title)

        if nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected:
            # logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype))
            bookid = book["bookid"]
            nzbTitle = (author + " - " + title + " LL.(" + book["bookid"] + ")").strip()
            nzburl = nzb["nzburl"]
            nzbprov = nzb["nzbprov"]
            nzbdate_temp = nzb["nzbdate"]
            nzbdate = formatter.nzbdate2format(nzbdate_temp)
            nzbmode = nzb["nzbmode"]
            controlValueDict = {"NZBurl": nzburl}
            newValueDict = {
                "NZBprov": nzbprov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": nzbsize,
                "NZBtitle": nzbTitle,
                "NZBmode": nzbmode,
                "Status": "Skipped",
            }

            score = (nzbBook_match + nzbAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(formatter.getList(nzb_Title))
            words -= len(formatter.getList(author))
            words -= len(formatter.getList(title))
            score -= abs(words)
            matches.append([score, nzb_Title, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u"Best match NZB (%s%%): %s using %s search" % (score, nzb_Title, searchtype))

        myDB.upsert("wanted", newValueDict, controlValueDict)

        snatchedbooks = myDB.action(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' % newValueDict["BookID"]
        ).fetchone()
        if not snatchedbooks:
            if nzbmode == "torznab":
                snatch = TORDownloadMethod(
                    newValueDict["BookID"],
                    newValueDict["NZBprov"],
                    newValueDict["NZBtitle"],
                    controlValueDict["NZBurl"],
                )
            else:
                snatch = NZBDownloadMethod(
                    newValueDict["BookID"],
                    newValueDict["NZBprov"],
                    newValueDict["NZBtitle"],
                    controlValueDict["NZBurl"],
                )
            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + " at " + formatter.now())
                common.schedule_job(action="Start", target="processDir")
                return True

    logger.debug(
        "No nzb's found for "
        + (book["authorName"] + " " + book["bookName"]).strip()
        + " using searchtype "
        + searchtype
    )
    return False

Example #23

Show file

File: searchrss.py Project: Evildantex/LazyLibrarian

def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        tor_Title = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip()
        tor_Title = re.sub(r"\s\s+", " ", tor_Title)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, tor_Title)
        tor_Title_match = fuzz.token_set_ratio(title, tor_Title)
        logger.debug("RSS Author/Title Match: %s/%s for %s" %(tor_Author_match, tor_Title_match, tor_Title))

        rejected = False
        for word in reject_list:
            if word in tor_Title.lower() and not word in author.lower() and not word in book.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (tor_Title, word))
                break

        if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected):
            logger.debug(u'Found RSS: %s' % tor['tor_title'])
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_url = tor['tor_url']
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
            if tor_size_temp is None:
                tor_size_temp = 1000
            tor_size = str(round(float(tor_size_temp) / 1048576, 2)) + ' MB'
            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }
            myDB.upsert("wanted", newValueDict, controlValueDict)
            snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                        bookid).fetchone()
            if not snatchedbooks:  # check if one of the other downloaders got there first
                if '.nzb' in tor_url:
                    snatch = NZBDownloadMethod(bookid, tor_prov, tor_Title, tor_url)
                else:    
                    #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                    if not tor_url.startswith('magnet'):  # magnets don't use auth
                        pwd  = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                        auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                        # don't know what form of password hash is required, try sha1    
                        tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                    snatch = TORDownloadMethod(bookid, tor_prov, tor_Title, tor_url)

                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(tor_Title) + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
                    return True

    logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                 book['bookName']).strip())
    return False

Example #24

Show file

File: gb.py Project: andyuc88/LazyLibrarian

    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                updated = False
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())

Example #25

Show file

File: searchrss.py Project: dmfinley/LazyLibrarian

def processResultList(resultlist, author, title, book):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ', ' to ': ' ', ' of ': ' ',
                # ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    matches = []
    
    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = formatter.latinToAscii(formatter.replace_all(tor['tor_title'], dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(author, torTitle)
        tor_Title_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" % (tor_Author_match, tor_Title_match, torTitle))

        rejected = False
        for word in reject_list:
            if word in torTitle.lower() and not word in author.lower() and not word in book.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                break

            tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
            if tor_size_temp is None:
                tor_size_temp = 1000
            tor_size = round(float(tor_size_temp) / 1048576, 2)

            maxsize = formatter.check_int(lazylibrarian.REJECT_MAXSIZE, 0)
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if (tor_Title_match >= match_ratio and tor_Author_match >= match_ratio and not rejected):
            #logger.debug(u'Found RSS: %s' % tor['tor_title'])
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_url = tor['tor_url']
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }
            
            score = (tor_Title_match + tor_Author_match)/2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(formatter.getList(torTitle))
            words -= len(formatter.getList(author))
            words -= len(formatter.getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]
        logger.info(u'Best match RSS (%s%%): %s using %s search' % 
            (score, nzb_Title, searchtype))
                  
        myDB.upsert("wanted", newValueDict, controlValueDict)

        snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                    newValueDict["BookID"]).fetchone()

        if not snatchedbooks:  # check if one of the other downloaders got there first
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """  
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                notifiers.notify_snatch(newValueDict["NZBtitle"] + ' at ' + formatter.now())
                common.schedule_job(action='Start', target='processDir')
                return True

    logger.debug("No RSS found for " + (book["authorName"] + ' ' +
                book['bookName']).strip())
    return False

Example #26

Show file

File: downloadmethods.py Project: andyuc88/LazyLibrarian

def DirectDownloadMethod(bookid=None, dl_title=None, dl_url=None, library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = "DIRECT"

    logger.debug("Starting Direct Download for [%s]" % dl_title)
    proxies = proxyList()
    headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
    try:
        r = requests.get(dl_url, headers=headers, timeout=90, proxies=proxies)
    except requests.exceptions.Timeout:
        logger.warn('Timeout fetching file from url: %s' % dl_url)
        return False
    except Exception as e:
        if hasattr(e, 'reason'):
            logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, e.reason))
        else:
            logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, dl_url, str(e)))
        return False

    if not str(r.status_code).startswith('2'):
        logger.debug("Got a %s response for %s" % (r.status_code, dl_url))
    elif len(r.content) < 1000:
        logger.debug("Only got %s bytes for %s, rejecting" % (len(r.content), dl_title))
    else:
        extn = ''
        basename = ''
        if ' ' in dl_title:
            basename, extn = dl_title.rsplit(' ', 1)  # last word is often the extension - but not always...
        if extn and extn in getList(lazylibrarian.CONFIG['EBOOK_TYPE']):
            dl_title = '.'.join(dl_title.rsplit(' ', 1))
        elif magic:
            mtype = magic.from_buffer(r.content)
            if 'EPUB' in mtype:
                extn = '.epub'
            elif 'Mobipocket' in mtype:  # also true for azw and azw3, does it matter?
                extn = '.mobi'
            elif 'PDF' in mtype:
                extn = '.pdf'
            else:
                logger.debug("magic reports %s" % mtype)
            basename = dl_title
        else:
            logger.warn("Don't know the filetype for %s" % dl_title)
            basename = dl_title

        logger.debug("File download got %s bytes for %s" % (len(r.content), dl_title))
        destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), basename)
        # destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), '%s LL.(%s)' % (basename, bookid))
        if not os.path.isdir(destdir):
            _ = mymakedirs(destdir)

        try:
            hashid = dl_url.split("md5=")[1].split("&")[0]
        except IndexError:
            hashid = sha1(encode(dl_url)).hexdigest()

        destfile = os.path.join(destdir, basename + extn)
        try:
            with open(destfile, 'wb') as bookfile:
                bookfile.write(r.content)
            setperm(destfile)
            downloadID = hashid
        except Exception as e:
            logger.error("%s writing book to %s, %s" % (type(e).__name__, destfile, e))

    if downloadID:
        logger.debug('File %s has been downloaded from %s' % (dl_title, dl_url))
        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,))
        elif library == 'AudioBook':
            myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,))
        myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
                    (Source, downloadID, dl_url))
        return True
    else:
        logger.error('Failed to download file @ <a href="%s">%s</a>' % (dl_url, dl_url))
        myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (dl_url,))
        return False

Example #27

Show file

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
  try:
    threadname = threading.currentThread().name
    if "Thread-" in threadname:
        threading.currentThread().name = "SEARCHMAG"

    myDB = database.DBConnection()
    searchlist = []

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 0:
        return

    # should clear old search results as might not be available any more
    # ie torrent not available, changed providers, out of news server retention etc.
    # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
    logger.debug(u"Removing old magazine search results")
    myDB.action('DELETE from pastissues WHERE Status="Skipped"')

    logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

    for searchmag in searchmags:
        bookid = searchmag['Title']
        searchterm = searchmag['Regex']

        if not searchterm:
            searchterm = searchmag['Title']
            dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
            searchterm = unaccented_str(replace_all(searchterm, dic))
            searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode(lazylibrarian.SYS_ENCODING)

        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            if tor_resultlist:
                for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                    resultlist.append({
                        'bookid': item['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                        'nzbsize': item['tor_size'],
                        'nzbmode': 'torrent'
                    })

        if lazylibrarian.USE_RSS():
            rss_resultlist, nproviders = IterateOverRSSSites(book, 'mag')
            if not nproviders:
                logger.warn('No rss providers are set. Check config for RSS providers')

            if rss_resultlist:
                for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                    resultlist.append({
                        'bookid': book['bookid'],
                        'nzbprov': item['tor_prov'],
                        'nzbtitle': item['tor_title'],
                        'nzburl': item['tor_url'],
                        'nzbdate':
                            item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                        'nzbsize': item['tor_size'],
                        'nzbmode': item['tor_type']
                    })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_name = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            maglist = []
            issues = []

            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = unaccented_str(nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                nzbdate = nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']

                results = myDB.match('SELECT * from magazines WHERE Title="%s"' % bookid)
                if not results:
                    logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                    bad_name = bad_name + 1
                else:
                    rejected = False
                    maxsize = check_int(lazylibrarian.REJECT_MAGSIZE, 0)
                    if maxsize and nzbsize > maxsize:
                        logger.debug("Rejecting %s, too large" % nzbtitle)
                        rejected = True

                    if not rejected:
                        control_date = results['IssueDate']
                        reject_list = getList(results['Reject'])

                        dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': ''}
                        nzbtitle_formatted = replace_all(nzbtitle, dic).strip()

                        # Need to make sure that substrings of magazine titles don't get found
                        # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                        # remove extra spaces if they're in a row
                        nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                        nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                        if ' ' in bookid:
                            bookid_exploded = bookid.split(' ')
                        else:
                            bookid_exploded = [bookid]

                        # check nzb starts with magazine title followed by a date
                        # eg The MagPI Issue 22 - July 2015

                        if len(nzbtitle_exploded) > len(bookid_exploded):
                            # needs to be longer as it has to include a date
                            # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                            mag_title_match = fuzz.token_set_ratio(
                                unaccented(bookid),
                                unaccented(nzbtitle_formatted))

                            if mag_title_match < lazylibrarian.MATCH_RATIO:
                                logger.debug(
                                    u"Magazine token set Match failed: " + str(
                                        mag_title_match) + "% for " + nzbtitle_formatted)
                                rejected = True
                            else:
                                logger.debug(
                                    u"Magazine matched: " + str(
                                        mag_title_match) + "% " + bookid + " for " + nzbtitle_formatted)
                        else:
                            rejected = True

                    if not rejected:
                        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
                                                    nzburl)
                        if already_failed:
                            logger.debug("Rejecting %s, blacklisted at %s" %
                                         (nzbtitle_formatted, already_failed['NZBprov']))
                            rejected = True

                    if not rejected:
                        lower_title = unaccented(nzbtitle_formatted).lower()
                        lower_bookid = unaccented(bookid).lower()
                        for word in reject_list:
                            if word in lower_title and word not in lower_bookid:
                                rejected = True
                                logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                break

                    if not rejected:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # or other words we don't want. Should make the word list configurable.
                        # so strip all the trailing junk...
                        strip_list = ['pdf', 'true', 'truepdf', 'german', 'ebooks']
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() in strip_list:
                                nzbtitle_exploded.pop()  # gotta love the function names

                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = month2num(unaccented(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure

                            # if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].rstrip(',').zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            # else:
                            # regexA_day = '01'  # monthly, or less frequent
                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months, or month <1 or >12
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY or MonthName DD, YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = month2num(unaccented(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].rstrip(',').zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'
                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        regexC_month = 0
                                        regexC_day = 0
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except Exception:
                                        # regexD Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                                        try:
                                            IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                            if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                # issue nn
                                                regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                if regexD_issue.isdigit():
                                                    newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                            else:
                                                IssueLabel = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                                if IssueLabel.lower() in ["issue", "no", "nr", "vol"]:
                                                    # issue nn, YYYY
                                                    regexD_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                    regexD_issue = regexD_issue.strip(',')
                                                    if regexD_issue.isdigit():
                                                        newdatish = str(int(regexD_issue))  # 4 == 04 == 004
                                                    else:
                                                        raise ValueError
                                                    regexD_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexD_year.isdigit():
                                                        if int(regexD_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                        except Exception:
                                            # regexE nn YYYY issue number without "Nr" before it
                                            # nn is assumed not to be a month as they are normally names not digits
                                            try:
                                                regexE_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                regexE_issue = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                                print "[%s][%s]" % (regexE_year, regexE_issue)
                                                if regexE_issue.isdigit():
                                                    newdatish = int(regexE_issue)
                                                    if int(regexE_year) < int(datetime.date.today().year):
                                                        newdatish = 0  # it's old
                                                else:
                                                    raise ValueError
                                            except Exception:
                                                # regexF issue and year as a single 6 digit string eg 222015
                                                try:
                                                    regexF = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                                    if regexF.isdigit() and len(regexF) == 6:
                                                        regexF_issue = regexF[:2]
                                                        regexF_year = regexF[2:]
                                                        newdatish = str(int(regexF_issue))  # 4 == 04 == 004
                                                        if int(regexF_year) < int(datetime.date.today().year):
                                                            newdatish = 0  # it's old
                                                    else:
                                                        raise ValueError

                                                except Exception:
                                                    logger.debug('Magazine %s not in a recognised date format.' %
                                                                 nzbtitle_formatted)
                                                    bad_date = bad_date + 1
                                                    # allow issues with good name but bad date to be included
                                                    # so user can manually select them, incl those with issue numbers
                                                    newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                                    # continue
                        else:
                            logger.debug('Magazine [%s] does not match the search term [%s].' % (
                                nzbtitle_formatted, bookid))
                            bad_name = bad_name + 1
                            continue

                        #  wanted issues go into wanted table marked "Wanted"
                        #  the rest into pastissues table marked "Skipped"
                        insert_table = "pastissues"
                        insert_status = "Skipped"

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            # or for magazines with only an issue number, use zero

                            if '-' in str(newdatish):
                                start_time = time.time()
                                start_time -= int(lazylibrarian.MAG_AGE) * 24 * 60 * 60  # number of seconds in days
                                if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                    start_time = 0
                                control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                logger.debug('Magazine date comparing to %s' % control_date)
                            else:
                                control_date = 0

                        if '-' in str(control_date) and '-' in str(newdatish):
                            # only grab a copy if it's newer than the most recent we have,
                            # or newer than a month ago if we have none
                            comp_date = datecompare(newdatish, control_date)
                        elif '-' not in str(control_date) and '-' not in str(newdatish):
                            # for issue numbers, check if later than last one we have
                            comp_date = int(newdatish) - int(control_date)
                            newdatish = "%s" % newdatish
                            newdatish = newdatish.zfill(4)  # pad so we sort correctly
                        else:
                            # invalid comparison of date and issue number
                            logger.debug('Magazine %s incorrect date or issue format.' % nzbtitle_formatted)
                            bad_date = bad_date + 1
                            newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                            comp_date = 0

                        if comp_date > 0:
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                issues.append(issue)
                                insert_table = "wanted"
                                insert_status = "Wanted"
                                nzbdate = now()  # when we asked for it
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1

                        #  store only the _new_ matching results
                        #  Don't add a new entry if this issue has been found on an earlier search
                        #  and status has been user-set ( we only delete the "Skipped" ones )
                        #  In "wanted" table it might be already snatched/downloading/processing

                        mag_entry = myDB.select('SELECT * from %s WHERE NZBtitle="%s" and NZBprov="%s"' % (
                                                insert_table, nzbtitle, nzbprov))
                        if not mag_entry:
                            controlValueDict = {
                                "NZBtitle": nzbtitle,
                                "NZBprov": nzbprov
                            }
                            newValueDict = {
                                "NZBurl": nzburl,
                                "BookID": bookid,
                                "NZBdate": nzbdate,
                                "AuxInfo": newdatish,
                                "Status": insert_status,
                                "NZBsize": nzbsize,
                                "NZBmode": nzbmode
                            }
                            myDB.upsert(insert_table, newValueDict, controlValueDict)

                    else:
                        # logger.debug('Magazine [%s] was rejected.' % nzbtitle_formatted)
                        bad_name = bad_name + 1

            logger.info('Found %i result%s for %s. %i new, %i old, %i fail date, %i fail name: %i to download' % (
                        total_nzbs, plural(total_nzbs), bookid, new_date, old_date, bad_date, bad_name, len(maglist)))

            for magazine in maglist:
                if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                    snatch = TORDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                else:
                    snatch = NZBDownloadMethod(
                        magazine['bookid'],
                        magazine['nzbprov'],
                        magazine['nzbtitle'],
                        magazine['nzburl'])
                if snatch:
                    logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                    notify_snatch("%s from %s at %s" %
                                  (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                    scheduleJob(action='Start', target='processDir')
            maglist = []

    if reset:
        scheduleJob(action='Restart', target='search_magazines')

    logger.info("Search for magazines complete")

  except Exception as e:
    logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())

Example #28

Show file

File: librarysync.py Project: forge33/LazyLibrarian

def find_book_in_db(myDB, author, book):
    # PAB fuzzy search for book in library, return LL bookid if found or zero
    # if not, return bookid to more easily update status
    # prefer an exact match on author & book
    match = myDB.action(
        'SELECT BookID FROM books where AuthorName="%s" and BookName="%s"' %
        (author, book)).fetchone()
    if match:
        logger.debug('Exact match [%s]' % book)
        return match['BookID']
    else:
        # No exact match
        # Try a more complex fuzzy match against each book in the db by this author
        # Using hard-coded ratios for now, ratio high (>90), partial_ratio lower (>75)
        # These are results that work well on my library, minimal false matches and no misses
        # on books that should be matched
        # Maybe make ratios configurable in config.ini later

        books = myDB.select('SELECT BookID,BookName FROM books where AuthorName="%s"' % author)

        best_ratio = 0
        best_partial = 0
        ratio_name = ""
        partial_name = ""
        ratio_id = 0
        partial_id = 0

        for a_book in books:
            # tidy up everything to raise fuzziness scores
            # still need to lowercase for matching against partial_name later on
            book_lower = unaccented(book.lower())
            a_book_lower = unaccented(a_book['BookName'].lower())
            #
            ratio = fuzz.ratio(book_lower, a_book_lower)
            partial = fuzz.partial_ratio(book_lower, a_book_lower)

            # lose a point for each extra word in the fuzzy matches so we get the closest match
            words = len(getList(book_lower))
            words -= len(getList(a_book_lower))
            ratio -= abs(words)
            partial -= abs(words)

            if ratio > best_ratio:
                best_ratio = ratio
                ratio_name = a_book['BookName']
                ratio_id = a_book['BookID']
            if partial > best_partial:
                best_partial = partial
                partial_name = a_book['BookName']
                partial_id = a_book['BookID']

            if partial == best_partial:
                # prefer the match closest to the left, ie prefer starting with a match and ignoring the rest
                # this eliminates most false matches against omnibuses
                # find the position of the shortest string in the longest
                if len(getList(book_lower)) >= len(getList(a_book_lower)):
                    match1 = book_lower.find(a_book_lower)
                else:
                    match1 = a_book_lower.find(book_lower)

                if len(getList(book_lower)) >= len(getList(partial_name.lower())):
                    match2 = book_lower.find(partial_name.lower())
                else:
                    match2 = partial_name.lower().find(book_lower)

                if match1 < match2:
                    logger.debug(
                        "Fuzz left change, prefer [%s] over [%s] for [%s]" %
                        (a_book['BookName'], partial_name, book))
                    best_partial = partial
                    partial_name = a_book['BookName']
                    partial_id = a_book['BookID']

        if best_ratio > 90:
            logger.debug(
                "Fuzz match   ratio [%d] [%s] [%s]" %
                (best_ratio, book, ratio_name))
            return ratio_id
        if best_partial > 75:
            logger.debug(
                "Fuzz match partial [%d] [%s] [%s]" %
                (best_partial, book, partial_name))
            return partial_id

        logger.debug(
            'Fuzz failed [%s - %s] ratio [%d,%s], partial [%d,%s]' %
            (author, book, best_ratio, ratio_name, best_partial, partial_name))
        return 0

Example #29

Show file

File: librarysync.py Project: forge33/LazyLibrarian

def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    if not startdir:
        if not lazylibrarian.DESTINATION_DIR:
            return 0
        else:
            startdir = lazylibrarian.DESTINATION_DIR

    if not os.path.isdir(startdir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' % startdir)
        return 0

    myDB = database.DBConnection()

    # keep statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        myDB.action('DELETE from stats')

    logger.info('Scanning ebook directory: %s' % startdir)

    new_book_count = 0
    file_count = 0
    author = ""

    if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))

    # to save repeat-scans of the same directory if it contains multiple formats of the same book,
    # keep track of which directories we've already looked at
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(startdir):
        for directory in d[:]:
            # prevent magazine being scanned
            if directory.startswith("_") or directory.startswith("."):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode(lazylibrarian.SYS_ENCODING)

            subdirectory = r.replace(startdir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (startdir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    extn = os.path.splitext(files)[1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == ".epub") or (extn == ".mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = unaccented(match_name)
                                match_auth = unaccented(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.info(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    # update cover file to cover.jpg in book folder (if exists)
                                    bookdir = book_filename.rsplit(os.sep, 1)[0]
                                    coverimg = os.path.join(bookdir, 'cover.jpg')
                                    cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache')
                                    cacheimg = os.path.join(cachedir, bookid + '.jpg')
                                    if os.path.isfile(coverimg):
                                        copyfile(coverimg, cacheimg)

                                    new_book_count += 1
                            else:
                                logger.debug(
                                    "Failed to match book [%s] by [%s] in database" %
                                    (book, author))


    logger.info("%s new/modified book%s found and added to the database" %
                (new_book_count, plural(new_book_count)))
    logger.info("%s file%s processed" % (file_count, plural(file_count)))

    # show statistics of full library scans
    if startdir == lazylibrarian.DESTINATION_DIR:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats").fetchone()
        if stats['sum(GR_book_hits)'] is not None:
            # only show stats if new books added
            if lazylibrarian.BOOK_API == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                    (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)'])))
            if lazylibrarian.BOOK_API == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                    (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                    (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                (stats['sum(LT_lang_hits)'], plural (stats['sum(LT_lang_hits)'])))
            logger.debug("Language cache was hit %s time%s" %
                (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)'])))
            logger.debug("Unwanted language removed %s book%s" %
                (stats['sum(bad_lang)'], plural (stats['sum(bad_lang)'])))
            logger.debug("Unwanted characters removed %s book%s" %
                (stats['sum(bad_char)'], plural(stats['sum(bad_char)'])))
            logger.debug("Unable to cache %s book%s with missing ISBN" %
                (stats['sum(uncached)'], plural(stats['sum(uncached)'])))
            logger.debug("Found %s duplicate book%s" %
                (stats['sum(duplicates)'], plural(stats['sum(duplicates)'])))
            logger.debug("Cache %s hit%s, %s miss" %
                (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
            nolang = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))

        authors = myDB.select('select AuthorID from authors')
        # Update bookcounts for all authors, not just new ones - refresh may have located
        # new books for existing authors especially if switched provider gb/gr
    else:
        # single author/book import
        authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author)

    logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
    for author in authors:
        update_totals(author['AuthorID'])

    images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
    if len(images):
        logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            bookid = item['bookid']
            bookimg = item['bookimg']
            bookname = item['bookname']
            newimg = cache_cover(bookid, bookimg)
            if newimg is not None:
                myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

    images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
    if len(images):
        logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
        for item in images:
            authorid = item['authorid']
            authorimg = item['authorimg']
            authorname = item['authorname']
            newimg = cache_cover(authorid, authorimg)
            if newimg is not None:
                myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))
    setWorkPages()
    logger.info('Library scan complete')
    return new_book_count

Example #30

Show file

File: grsync.py Project: andyuc88/LazyLibrarian

def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))

        logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" % (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']:
                        try:
                            res, content = GA.BookToList(book, shelf, action='remove')
                        except Exception as e:
                            logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" % (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn("Not setting %s as Wanted, already marked Open" % book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' % traceback.format_exc())
        return 0, 0

Example #31

Show file

File: searchmag.py Project: andrzejc/LazyLibrarian

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab

    myDB = database.DBConnection()
    searchlist = []
    threading.currentThread().name = "SEARCHMAGS"

    if mags is None:  # backlog search
        searchmags = myDB.select('SELECT Title, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
    else:
        searchmags = []
        for magazine in mags:
            searchmags_temp = myDB.select('SELECT Title, LastAcquired, IssueDate from magazines \
                                          WHERE Title="%s" AND Status="Active"' % (magazine['bookid']))
            for terms in searchmags_temp:
                searchmags.append(terms)

    if len(searchmags) == 1:
        logger.info('Searching for one magazine')
    else:
        logger.info('Searching for %i magazines' % len(searchmags))

    for searchmag in searchmags:
        bookid = searchmag[0]
        searchterm = searchmag[0]
        # frequency = searchmag[1]
        # last_acquired = searchmag[2]
        # issue_date = searchmag[3]

        dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}

        searchterm = formatter.latinToAscii(formatter.replace_all(searchterm, dic))
        searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8')
        searchlist.append({"bookid": bookid, "searchterm": searchterm})

    if searchlist == []:
        logger.warn('There is nothing to search for.  Mark some magazines as active.')

    for book in searchlist:

        resultlist = []
        tor_resultlist = []
        if lazylibrarian.USE_NZB():
            resultlist, nproviders = providers.IterateOverNewzNabSites(book, 'mag')
            if not nproviders:
                logger.warn('No nzb providers are set. Check config for NEWZNAB or TORZNAB providers')

        if lazylibrarian.USE_TOR():
            tor_resultlist, nproviders = providers.IterateOverTorrentSites(book, 'mag')
            if not nproviders:
                logger.warn('No torrent providers are set. Check config for TORRENT providers')

            for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                resultlist.append({
                    'bookid': item['bookid'],
                    'nzbprov': item['tor_prov'],
                    'nzbtitle': item['tor_title'],
                    'nzburl': item['tor_url'],
                    'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                    'nzbsize': item['tor_size'],
                    'nzbmode': 'torrent'
                })

        if not resultlist:
            logger.debug("Adding magazine %s to queue." % book['searchterm'])

        else:
            bad_regex = 0
            bad_date = 0
            old_date = 0
            total_nzbs = 0
            new_date = 0
            to_snatch = 0
            maglist = []
            issues = []
            reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)
            for nzb in resultlist:
                total_nzbs = total_nzbs + 1
                bookid = nzb['bookid']
                nzbtitle = (u'%s' % nzb['nzbtitle'])
                nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                nzburl = nzb['nzburl']
                nzbprov = nzb['nzbprov']
                nzbdate_temp = nzb['nzbdate']
                nzbsize_temp = nzb['nzbsize']
                if nzbsize_temp is None:  # not all torrents returned by torznab have a size
                    nzbsize_temp = 1000
                nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
                nzbdate = formatter.nzbdate2format(nzbdate_temp)
                nzbmode = nzb['nzbmode']
                checkifmag = myDB.select('SELECT * from magazines WHERE Title="%s"' % bookid)
                if checkifmag:
                    for results in checkifmag:
                        control_date = results['IssueDate']
                        # frequency = results['Frequency']
                        # regex = results['Regex']

                    nzbtitle_formatted = nzbtitle.replace('.', ' ').replace('-', ' ').replace('/', ' ').replace(
                        '+', ' ').replace('_', ' ').replace('(', '').replace(')', '').strip()
                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this
                    # keyword_check = nzbtitle_formatted.replace(bookid, '')
                    # remove extra spaces if they're in a row
                    nzbtitle_exploded_temp = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_exploded_temp.split(' ')

                    if ' ' in bookid:
                        bookid_exploded = bookid.split(' ')
                    else:
                        bookid_exploded = [bookid]

                    # check nzb starts with magazine title, and ends with a date
                    # eg The MagPI Issue 22 - July 2015
                    # do something like check left n words match title
                    # then check last n words are a date

                    name_match = 1  # assume name matches for now
                    if len(nzbtitle_exploded) > len(bookid_exploded):  # needs to be longer as it has to include a date
                        # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz
                        mag_title_match = fuzz.token_set_ratio(common.remove_accents(bookid), common.remove_accents(nzbtitle_formatted))
                        if mag_title_match < lazylibrarian.MATCH_RATIO:
                            logger.debug(u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted)
                            name_match = 0
                    
                    lower_title = common.remove_accents(nzbtitle_formatted).lower()
                    lower_bookid = common.remove_accents(bookid).lower()
                    for word in reject_list:
                        if word in lower_title and not word in lower_bookid:
                            name_match = 0
                            logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                            break

                    if name_match:
                        # some magazine torrent uploaders add their sig in [] or {}
                        # Fortunately for us, they always seem to add it at the end
                        # also some magazine torrent titles are "magazine_name some_form_of_date pdf"
                        # so strip all the trailing junk...
                        while nzbtitle_exploded[len(nzbtitle_exploded) - 1][0] in '[{' or \
                                nzbtitle_exploded[len(nzbtitle_exploded) - 1].lower() == 'pdf':
                                nzbtitle_exploded.pop()  # gotta love the function names
                        
                        # need at least one word magazine title and two date components
                        if len(nzbtitle_exploded) > 2:
                            # regexA = DD MonthName YYYY OR MonthName YYYY or Issue nn MonthName YYYY
                            regexA_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                            regexA_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                            regexA_month = formatter.month2num(common.remove_accents(regexA_month_temp))
                            if not regexA_year.isdigit() or int(regexA_year) < 1900 or int(regexA_year) > 2100:
                                regexA_year = 'fail'  # force date failure
                            
                            #if frequency == "Weekly" or frequency == "BiWeekly":
                            regexA_day = nzbtitle_exploded[len(nzbtitle_exploded) - 3].zfill(2)
                            if regexA_day.isdigit():
                                if int(regexA_day) > 31:  # probably issue number nn
                                    regexA_day = '01'
                            else:
                                regexA_day = '01'  # just MonthName YYYY
                            #else:
                            #    regexA_day = '01'  # monthly, or less frequent

                            try:
                                newdatish = regexA_year + '-' + regexA_month + '-' + regexA_day
                                # try to make sure the year/month/day are valid, exception if not
                                # ie don't accept day > 31, or 30 in some months
                                # also handles multiple date format named issues eg Jan 2014, 01 2014
                                # datetime will give a ValueError if not a good date or a param is not int
                                date1 = datetime.date(int(regexA_year), int(regexA_month), int(regexA_day))
                            except ValueError:
                                # regexB = MonthName DD YYYY
                                regexB_year = nzbtitle_exploded[len(nzbtitle_exploded) - 1]
                                regexB_month_temp = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                regexB_month = formatter.month2num(common.remove_accents(regexB_month_temp))
                                regexB_day = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                if not regexB_year.isdigit() or int(regexB_year) < 1900 or int(regexB_year) > 2100:
                                    regexB_year = 'fail'

                                try:
                                    newdatish = regexB_year + '-' + regexB_month + '-' + regexB_day
                                    # datetime will give a ValueError if not a good date or a param is not int
                                    date1 = datetime.date(int(regexB_year), int(regexB_month), int(regexB_day))
                                except ValueError:
                                    # regexC = YYYY MM or YYYY MM DD
                                    # (can't get MM/DD if named YYYY Issue nn)
                                    # First try  YYYY MM
                                    regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 2]
                                    if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                        regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        regexC_day = '01'
                                    else:  # try YYYY MM DD
                                        regexC_year = nzbtitle_exploded[len(nzbtitle_exploded) - 3]
                                        if regexC_year.isdigit() and int(regexC_year) > 1900 and int(regexC_year) < 2100:
                                            regexC_month = nzbtitle_exploded[len(nzbtitle_exploded) - 2].zfill(2)
                                            regexC_day = nzbtitle_exploded[len(nzbtitle_exploded) - 1].zfill(2)
                                        else:
                                            regexC_year = 'fail'
                                    try:
                                        newdatish = regexC_year + '-' + regexC_month + '-' + regexC_day
                                        # datetime will give a ValueError if not a good date or a param is not int
                                        date1 = datetime.date(int(regexC_year), int(regexC_month), int(regexC_day))
                                    except:
                                        logger.debug('Magazine %s not in proper date format.' % nzbtitle_formatted)
                                        bad_date = bad_date + 1
                                        # allow issues with good name but bad date to be included
                                        # so user can manually select them, incl those with issue numbers
                                        newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                        # continue
                        else:
                            continue

                        if control_date is None:  # we haven't got any copies of this magazine yet
                            # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                            # could perhaps calc differently for weekly, biweekly etc
                            start_time = time.time()
                            start_time -= 31 * 24 * 60 * 60  # number of seconds in 31 days
                            control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))

                        # only grab a copy if it's newer than the most recent we have,
                        # or newer than a month ago if we have none
                        comp_date = formatter.datecompare(newdatish, control_date)
                        if comp_date > 0:
                            # Should probably only upsert when downloaded and processed in case snatch fails
                            # keep track of what we're going to download so we don't download dupes
                            new_date = new_date + 1
                            issue = bookid + ',' + newdatish
                            if issue not in issues:
                                maglist.append({
                                    'bookid': bookid,
                                    'nzbprov': nzbprov,
                                    'nzbtitle': nzbtitle,
                                    'nzburl': nzburl,
                                    'nzbmode': nzbmode
                                })
                                logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                to_snatch = to_snatch + 1
                                issues.append(issue)
                                
                                controlValueDict = {"NZBurl": nzburl}
                                newValueDict = {
                                    "NZBprov": nzbprov,
                                    "BookID": bookid,
                                    "NZBdate": formatter.now(),  # when we asked for it
                                    "NZBtitle": nzbtitle,
                                    "AuxInfo": newdatish,
                                    "Status": "Wanted",
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert("wanted", newValueDict, controlValueDict)
                                
                            else:
                                logger.debug('This issue of %s is already flagged for download' % issue)
                        else:
                            if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date = old_date + 1
                    else:
                        logger.debug('Magazine [%s] does not completely match search term [%s].' % (
                                     nzbtitle_formatted, bookid))
                        bad_regex = bad_regex + 1

            logger.info('Found %s results for %s.  %s are new, %s are old, %s fail date, %s fail name matching' % (
                        total_nzbs, bookid, new_date, old_date, bad_date, bad_regex))
            logger.info("%s, %s issues to download" % (bookid, to_snatch))

            for items in maglist:
                if items['nzbmode'] == "torznab":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                elif items['nzbmode'] == "torrent":
                    snatch = TORDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                else:
                    snatch = NZBDownloadMethod(items['bookid'], items['nzbprov'], items['nzbtitle'], items['nzburl'])
                if snatch:
                    notifiers.notify_snatch(formatter.latinToAscii(items['nzbtitle']) + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
            maglist = []

    if reset == True:
        common.schedule_job(action='Restart', target='search_magazines')
        
    logger.info("Search for magazines complete")

Example #32

Show file

def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '', '2': '',
                '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '', ':': '', '!': '',
                '-': ' ', '\s\s': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)
    author = unaccented_str(replace_all(book['authorName'], dic))
    title = unaccented_str(replace_all(book['bookName'], dic))

    matches = []
    for tor in resultlist:
        torTitle = unaccented_str(tor['tor_title'])
        torTitle = replace_all(torTitle, dictrepl).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        torAuthor_match = fuzz.token_set_ratio(author, torTitle)
        torBook_match = fuzz.token_set_ratio(title, torTitle)
        logger.debug(u"TOR author/book Match: %s/%s for %s" % (torAuthor_match, torBook_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match('SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' % tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" % (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in author.lower() and word not in title.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" % (torTitle, word))
                    break

        tor_size_temp = tor['tor_size']  # Need to cater for when this is NONE (Issue 35)
        tor_size_temp = check_int(tor_size_temp, 1000)
        tor_size = round(float(tor_size_temp) / 1048576, 2)

        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)
        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor['tor_prov'],
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (torBook_match + torAuthor_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(author))
            words -= len(getList(title))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.info(u'Nearest TOR match (%s%%): %s using %s search for %s %s' %
                        (score, nzb_Title, searchtype, author, title))
            return False

        logger.info(u'Best TOR match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                   newValueDict["BookID"])
        if snatchedbooks:
            logger.debug('%s already marked snatched' % nzb_Title)
            return True  # someone else found it, not us
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            if newValueDict["NZBprov"] == 'libgen':
                # for libgen we use direct download links
                snatch = DirectDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                              newValueDict["NZBtitle"], controlValueDict["NZBurl"], nzb_Title)
            else:
                snatch = TORDownloadMethod(newValueDict["BookID"], newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], controlValueDict["NZBurl"])
            if snatch:
                logger.info('Downloading %s from %s' % (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch("%s from %s at %s" %
                              (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No torrent's found for [%s] using searchtype %s" % (book["searchterm"], searchtype))
    return False

Example #33

Show file

File: calibre.py Project: DobyTang/LazyLibrarian

def syncCalibreList(col_read=None, col_toread=None, userid=None):
    """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs,
        and sync current/supplied user's read/toread or supplied read/toread columns to calibre database.
        Return message giving totals """

    myDB = database.DBConnection()
    username = ''
    readlist = []
    toreadlist = []
    if not userid:
        cookie = cherrypy.request.cookie
        if cookie and 'll_uid' in list(cookie.keys()):
            userid = cookie['ll_uid'].value
    if userid:
        res = myDB.match('SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?',
                         (userid,))
        if res:
            username = res['UserName']
            if not col_read:
                col_read = res['CalibreRead']
            if not col_toread:
                col_toread = res['CalibreToRead']
            toreadlist = getList(res['ToRead'])
            readlist = getList(res['HaveRead'])
            # suppress duplicates (just in case)
            toreadlist = list(set(toreadlist))
            readlist = list(set(readlist))
        else:
            return "Error: Unable to get user column settings for %s" % userid

    if not userid:
        return "Error: Unable to find current userid"

    if not col_read and not col_toread:
        return "User %s has no calibre columns set" % username

    # check user columns exist in calibre and create if not
    res = calibredb('custom_columns')
    columns = res[0].split('\n')
    custom_columns = []
    for column in columns:
        if column:
            custom_columns.append(column.split(' (')[0])

    if col_read not in custom_columns:
        added = calibredb('add_custom_column', [col_read, col_read, 'bool'])
        if "column created" not in added[0]:
            return added
    if col_toread not in custom_columns:
        added = calibredb('add_custom_column', [col_toread, col_toread, 'bool'])
        if "column created" not in added[0]:
            return added

    nomatch = 0
    readcol = ''
    toreadcol = ''
    map_ctol = {}
    map_ltoc = {}
    if col_read:
        readcol = '*' + col_read
    if col_toread:
        toreadcol = '*' + col_toread

    calibre_list = calibreList(col_read, col_toread)
    if not isinstance(calibre_list, list):
        # got an error message from calibredb
        return '"%s"' % calibre_list

    for item in calibre_list:
        if toreadcol and toreadcol in item or readcol and readcol in item:
            authorname, authorid, added = addAuthorNameToDB(item['authors'], refresh=False, addbooks=False)
            if authorname:
                if authorname != item['authors']:
                    logger.debug("Changed authorname for [%s] from [%s] to [%s]" %
                                 (item['title'], item['authors'], authorname))
                    item['authors'] = authorname
                bookid, mtype = find_book_in_db(authorname, item['title'], ignored=False, library='eBook')
                if bookid and mtype == "Ignored":
                    logger.warn("Book %s by %s is marked Ignored in database, importing anyway" %
                                (item['title'], authorname))
                if not bookid:
                    searchterm = "%s <ll> %s" % (item['title'], authorname)
                    results = search_for(unaccented(searchterm))
                    if results:
                        result = results[0]
                        if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \
                                and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']:
                            logger.debug("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'],
                                                                       result['authorname'], result['bookname']))
                            bookid = result['bookid']
                            import_book(bookid)
                if bookid:
                    # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string
                    # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric)
                    # so convert all dict items to strings for ease of matching.
                    map_ctol[str(item['id'])] = str(bookid)
                    map_ltoc[str(bookid)] = str(item['id'])
                else:
                    logger.warn('Calibre Book [%s] by [%s] is not in lazylibrarian database' %
                                (item['title'], authorname))
                    nomatch += 1
            else:
                logger.warn('Calibre Author [%s] not matched in lazylibrarian database' % (item['authors']))
                nomatch += 1

    # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones
    # which might be books calibre doesn't have, or might be minor differences in author or title

    for idlist in [("Read", readlist), ("To_Read", toreadlist)]:
        booklist = idlist[1]
        for bookid in booklist:
            cmd = "SELECT AuthorID,BookName from books where BookID=?"
            book = myDB.match(cmd, (bookid,))
            if not book:
                logger.error('Error finding bookid %s' % bookid)
            else:
                cmd = "SELECT AuthorName from authors where AuthorID=?"
                author = myDB.match(cmd, (book['AuthorID'],))
                if not author:
                    logger.error('Error finding authorid %s' % book['AuthorID'])
                else:
                    match = False
                    high = 0
                    highname = ''
                    for item in calibre_list:
                        if item['authors'] == author['AuthorName'] and item['title'] == book['BookName']:
                            logger.debug("Exact match for %s [%s]" % (idlist[0], book['BookName']))
                            map_ctol[str(item['id'])] = str(bookid)
                            map_ltoc[str(bookid)] = str(item['id'])
                            match = True
                            break
                    if not match:
                        highid = ''
                        for item in calibre_list:
                            if item['authors'] == author['AuthorName']:
                                n = fuzz.token_sort_ratio(item['title'], book['BookName'])
                                if n > high:
                                    high = n
                                    highname = item['title']
                                    highid = item['id']

                        if high > 95:
                            logger.debug("Found ratio match %s%% [%s] for %s [%s]" %
                                         (high, highname, idlist[0], book['BookName']))
                            map_ctol[str(highid)] = str(bookid)
                            map_ltoc[str(bookid)] = str(highid)
                            match = True

                    if not match:
                        logger.warn("No match for %s %s by %s in calibre database, closest match %s%% [%s]" %
                                    (idlist[0], book['BookName'], author['AuthorName'], high, highname))
                        nomatch += 1

    logger.debug("BookID mapping complete, %s match %s, nomatch %s" % (username, len(map_ctol), nomatch))

    # now sync the lists
    if not userid:
        msg = "No userid found"
    else:
        last_read = []
        last_toread = []
        calibre_read = []
        calibre_toread = []

        cmd = 'select SyncList from sync where UserID=? and Label=?'
        res = myDB.match(cmd, (userid, col_read))
        if res:
            last_read = getList(res['SyncList'])
        res = myDB.match(cmd, (userid, col_toread))
        if res:
            last_toread = getList(res['SyncList'])

        for item in calibre_list:
            if toreadcol and toreadcol in item and item[toreadcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_toread.append(map_ctol[str(item['id'])])
                else:
                    logger.warn("Calibre to_read book %s:%s has no lazylibrarian bookid" %
                                (item['authors'], item['title']))
            if readcol and readcol in item and item[readcol]:  # only if True
                if str(item['id']) in map_ctol:
                    calibre_read.append(map_ctol[str(item['id'])])
                else:
                    logger.warn("Calibre read book %s:%s has no lazylibrarian bookid" %
                                (item['authors'], item['title']))

        logger.debug("Found %s calibre read, %s calibre toread" % (len(calibre_read), len(calibre_toread)))
        logger.debug("Found %s lazylib read, %s lazylib toread" % (len(readlist), len(toreadlist)))

        added_to_ll_toread = list(set(toreadlist) - set(last_toread))
        removed_from_ll_toread = list(set(last_toread) - set(toreadlist))
        added_to_ll_read = list(set(readlist) - set(last_read))
        removed_from_ll_read = list(set(last_read) - set(readlist))
        logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" % (len(added_to_ll_toread),
                     len(removed_from_ll_toread), len(added_to_ll_read), len(removed_from_ll_read)))

        added_to_calibre_toread = list(set(calibre_toread) - set(last_toread))
        removed_from_calibre_toread = list(set(last_toread) - set(calibre_toread))
        added_to_calibre_read = list(set(calibre_read) - set(last_read))
        removed_from_calibre_read = list(set(last_read) - set(calibre_read))
        logger.debug("calibre changes to copy to lazylibrarian: %s %s %s %s" % (len(added_to_calibre_toread),
                     len(removed_from_calibre_toread), len(added_to_calibre_read), len(removed_from_calibre_read)))

        calibre_changes = 0
        for item in added_to_calibre_read:
            if item not in readlist:
                readlist.append(item)
                logger.debug("Lazylibrarian marked %s as read" % item)
                calibre_changes += 1
        for item in added_to_calibre_toread:
            if item not in toreadlist:
                toreadlist.append(item)
                logger.debug("Lazylibrarian marked %s as to_read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_read:
            if item in readlist:
                readlist.remove(item)
                logger.debug("Lazylibrarian removed %s from read" % item)
                calibre_changes += 1
        for item in removed_from_calibre_toread:
            if item in toreadlist:
                toreadlist.remove(item)
                logger.debug("Lazylibrarian removed %s from to_read" % item)
                calibre_changes += 1
        if calibre_changes:
            myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?',
                        (', '.join(toreadlist), ', '.join(readlist), userid))
        ll_changes = 0
        for item in added_to_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], 'true'], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" % (col_toread, item))
        for item in removed_from_ll_toread:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" % (col_toread, item))

        for item in added_to_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], 'true'], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to set calibre %s true for %s" % (col_read, item))

        for item in removed_from_ll_read:
            if item in map_ltoc:
                res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], ''], [])
                if rc:
                    msg = "calibredb set_custom error: "
                    if err:
                        logger.error(msg + err)
                    elif res:
                        logger.error(msg + res)
                    else:
                        logger.error(msg + str(rc))
                else:
                    ll_changes += 1
            else:
                logger.warn("Unable to clear calibre %s for %s" % (col_read, item))

        # store current sync list as comparison for next sync
        controlValueDict = {"UserID": userid, "Label": col_read}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(readlist)}
        myDB.upsert("sync", newValueDict, controlValueDict)
        controlValueDict = {"UserID": userid, "Label": col_toread}
        newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(toreadlist)}
        myDB.upsert("sync", newValueDict, controlValueDict)

        msg = "%s sync updated: %s calibre, %s lazylibrarian" % (username, ll_changes, calibre_changes)
    return msg

Example #34

Show file

File: manualbook.py Project: jamiees2/LazyLibrarian

def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov = IterateOverRSSSites()
        if nprov:
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbprov' in item:
            provider = item['nzbprov']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_prov' in item:
            provider = item['tor_prov']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': quote_plus(url), 'mode': mode}

                searchresults.append(result)

                # from operator import itemgetter
                # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults

Example #35

Show file

def TORDownloadMethod(bookid=None,
                      tor_title=None,
                      tor_url=None,
                      library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC',
                                          value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url,
                             headers=headers,
                             timeout=90,
                             proxies=proxies)
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            if hasattr(e, 'reason'):
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, e.reason))
            else:
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, str(e)))
            return False

        torrent = r.content

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, unicode):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm'
                    setperm(tor_path)
                    msg += 'Saved'
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.debug("Failed to write magnet to file: %s %s" %
                                 (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                    tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, unicode):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved'
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.debug("Failed to write torrent to file: %s %s" %
                             (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[
            'UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[
            'RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[
                'QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url,
                                        hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[
                'TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[
            'DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     lazylibrarian.CONFIG['DELUGE_URL_BASE'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet',
                                             tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url,
                                             args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID,
                                        lazylibrarian.CONFIG['DELUGE_LABEL'])
                    result = client.call('core.get_torrent_status', downloadID,
                                         {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s %s' %
                             (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn(
                    '%s: name contains hash, probably unresolved magnet' %
                    Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" %
                                 library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" %
                                     (tor_title, word))
                        break
                if rejected:
                    myDB.action(
                        'UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                        (full_url, ))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' %
                                 (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?',
                                (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                (full_url, ))
    return False

Example #36

Show file

File: searchmag.py Project: knobunc/LazyLibrarian

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, DateType, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"')
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select('SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"', (magazine['bookid'],))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        # logger.debug("Removing old magazine search results")
        # myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']
            datetype = searchmag['DateType']
            if not datetype:
                datetype = ''

            if not searchterm:
                dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': ''}
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                if PY2:
                    searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm, "datetype": datetype})

        if not searchlist:
            logger.warn('There is nothing to search for.  Mark some magazines as active.')

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn('No nzb providers are available. Check config and blocklist')
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow:
                        logger.warn('No direct providers are available. Check config and blocklist')
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn('No tor providers are available. Check config and blocklist')
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn('No rss providers are available. Check config and blocklist')
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                                item['tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace("'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(nzbsize_temp, 1000)  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    # Need to make sure that substrings of magazine titles don't get found
                    # (e.g. Maxim USA will find Maximum PC USA) so split into "words"
                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    nzbtitle_formatted = replace_all(nzbtitle, dic).strip()
                    # remove extra spaces if they're in a row
                    nzbtitle_formatted = " ".join(nzbtitle_formatted.split())
                    nzbtitle_exploded = nzbtitle_formatted.split(' ')

                    results = myDB.match('SELECT * from magazines WHERE Title=?', (bookid,))
                    if not results:
                        logger.debug('Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" % nzbtitle)
                                rejected = True

                        if not rejected:
                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # Check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    word = unaccented(word).lower()
                                    if word:
                                        wlist.append(word)
                                for word in bookid_exploded:
                                    word = unaccented(word).lower()
                                    if word and word not in wlist:
                                        logger.debug("Rejecting %s, missing %s" % (nzbtitle, word))
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        "Magazine title match failed " + bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        "Magazine title matched " + bookid + " for " + nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                            blocked = myDB.match('SELECT * from wanted WHERE NZBurl=?', (nzburl,))
                            if blocked:
                                logger.debug("Rejecting %s, blacklisted at %s" %
                                             (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(str(results['Reject']).lower())
                            reject_list += getList(lazylibrarian.CONFIG['REJECT_MAGS'], ',')
                            lower_title = unaccented(nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Reject: %s' % str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word))
                                    break

                        if rejected:
                            rejects += 1
                        else:
                            regex_pass, issuedate, year = get_issue_date(nzbtitle_exploded)
                            if regex_pass:
                                logger.debug('Issue %s (regex %s) for %s ' %
                                             (issuedate, regex_pass, nzbtitle_formatted))
                                datetype_ok = True
                                datetype = book['datetype']
                                if datetype:
                                    # check all wanted parts are in the regex result
                                    # Day Month Year Vol Iss (MM needs two months)

                                    if 'M' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 12]:
                                        datetype_ok = False
                                    elif 'D' in datetype and regex_pass not in [3, 5, 6]:
                                        datetype_ok = False
                                    elif 'MM' in datetype and regex_pass not in [1]:  # bi monthly
                                        datetype_ok = False
                                    elif 'V' in datetype and 'I' in datetype and regex_pass not in [8, 9, 17, 18]:
                                        datetype_ok = False
                                    elif 'V' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 17, 18]:
                                        datetype_ok = False
                                    elif 'I' in datetype and regex_pass not in [2, 10, 11, 12, 13, 14, 16, 17, 18]:
                                        datetype_ok = False
                                    elif 'Y' in datetype and regex_pass not in [1, 2, 3, 4, 5, 6, 7, 8, 10,
                                                                                12, 13, 15, 16, 18]:
                                        datetype_ok = False
                            else:
                                datetype_ok = False
                                logger.debug('Magazine %s not in a recognised date format.' % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                issuedate = "1970-01-01"  # provide a fake date for bad-date issues

                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped" or "Have"
                            insert_table = "pastissues"
                            comp_date = 0
                            if datetype_ok:
                                control_date = results['IssueDate']
                                logger.debug("Control date: [%s]" % control_date)
                                if not control_date:  # we haven't got any copies of this magazine yet
                                    # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd
                                    # could perhaps calc differently for weekly, biweekly etc
                                    # For magazines with only an issue number use zero as we can't tell age

                                    if str(issuedate).isdigit():
                                        logger.debug('Magazine comparing issue numbers (%s)' % issuedate)
                                        control_date = 0
                                    elif re.match('\d+-\d\d-\d\d', str(issuedate)):
                                        start_time = time.time()
                                        start_time -= int(
                                            lazylibrarian.CONFIG['MAG_AGE']) * 24 * 60 * 60  # number of seconds in days
                                        if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                            start_time = 0
                                        control_date = time.strftime("%Y-%m-%d", time.localtime(start_time))
                                        logger.debug('Magazine date comparing to %s' % control_date)
                                    else:
                                        logger.debug('Magazine unable to find comparison type [%s]' % issuedate)
                                        control_date = 0

                                if str(control_date).isdigit() and str(issuedate).isdigit():
                                    # for issue numbers, check if later than last one we have
                                    if regex_pass in [10, 12, 13] and year:
                                        issuedate = "%s%04d" % (year, int(issuedate))
                                    else:
                                        issuedate = str(issuedate).zfill(4)
                                    if not control_date:
                                        comp_date = 1
                                    else:
                                        comp_date = int(issuedate) - int(control_date)
                                elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                        re.match('\d+-\d\d-\d\d', str(issuedate)):
                                    # only grab a copy if it's newer than the most recent we have,
                                    # or newer than a month ago if we have none
                                    comp_date = datecompare(issuedate, control_date)
                                else:
                                    # invalid comparison of date and issue number
                                    comp_date = 0
                                    if re.match('\d+-\d\d-\d\d', str(control_date)):
                                        if regex_pass > 9 and year:
                                            # we assumed it was an issue number, but it could be a date
                                            year = check_int(year, 0)
                                            if regex_pass in [10, 12, 13]:
                                                issuedate = int(issuedate[:4])
                                            issuenum = check_int(issuedate, 0)
                                            if year and 1 <= issuenum <= 12:
                                                issuedate = "%04d-%02d-01" % (year, issuenum)
                                                comp_date = datecompare(issuedate, control_date)
                                        if not comp_date:
                                            logger.debug('Magazine %s failed: Expecting a date' % nzbtitle_formatted)
                                    else:
                                        logger.debug('Magazine %s failed: Expecting issue number' % nzbtitle_formatted)
                                    if not comp_date:
                                        bad_date += 1
                                        issuedate = "1970-01-01"

                            if issuedate == "1970-01-01":
                                logger.debug('This issue of %s is unknown age; skipping.' % nzbtitle_formatted)
                            elif not datetype_ok:
                                logger.debug('This issue of %s not in a wanted date format.' % nzbtitle_formatted)
                            elif comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + issuedate
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug('This issue of %s is new, downloading' % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' % len(issues))
                                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug('This issue of %s is already flagged for download' % issue)
                            else:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('This issue of %s is old; skipping.' % nzbtitle_formatted)
                                old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match('SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' %
                                                   insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('%s is already in %s marked %s' %
                                                 (nzbtitle, insert_table, mag_entry['Status']))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                if insert_table == 'pastissues':
                                    # try to mark ones we've already got
                                    match = myDB.match("SELECT * from issues WHERE Title=? AND IssueDate=?",
                                                       (bookid, issuedate))
                                    if match:
                                        insert_status = "Have"
                                    else:
                                        insert_status = "Skipped"
                                else:
                                    insert_status = "Wanted"
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": issuedate,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict, controlValueDict)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag:
                                    logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch, res = TORDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'direct':
                        snatch, res = DirectDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    elif magazine['nzbmode'] == 'nzb':
                        snatch, res = NZBDownloadMethod(
                            magazine['bookid'],
                            magazine['nzbtitle'],
                            magazine['nzburl'],
                            'Magazine')
                    else:
                        res = 'Unhandled NZBmode [%s] for %s' % (magazine['nzbmode'], magazine["nzburl"])
                        logger.error(res)
                        snatch = 0

                    if snatch:
                        logger.info('Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"]))
                        custom_notify_snatch("%s %s" % (magazine['bookid'], magazine['nzburl']))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now()))
                        scheduleJob(action='Start', target='PostProcessor')
                    else:
                        myDB.action('UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                                    (res, magazine["nzburl"]))

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #37

Show file

File: librarysync.py Project: Trixter69/LazyLibrarian

def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats ( authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    new_authors = []

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action(
                    'update books set Status="%s" where BookID="%s"' %
                    (status, bookID))
                myDB.action(
                    'update books set BookFile="" where BookID="%s"' %
                    bookID)
                logger.warn(
                    'Book %s - %s updated as not found on disk' %
                    (bookAuthor, bookName))
            # for book_type in getList(lazylibrarian.EBOOK_TYPE):
            #	bookName = book['BookName']
            #	bookAuthor = book['AuthorName']
            # Default destination path, should be allowed change per config file.
            #	dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', bookAuthor).replace('$Title', bookName)
            # dest_path = authorname+'/'+bookname
            #	global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', bookAuthor).replace('$Title', bookName)
#
            #	encoded_book_path = os.path.join(dir,dest_path,global_name + "." + book_type).encode(lazylibrarian.SYS_ENCODING)
            #	if os.path.isfile(encoded_book_path):
            #		book_exists = True
            # if not book_exists:
            #	myDB.action('update books set Status=? where AuthorName=? and BookName=?',[status,bookAuthor,bookName])
            # logger.info('Book %s updated as not found on disk' %
            # encoded_book_path.decode(lazylibrarian.SYS_ENCODING, 'replace') )
                if bookAuthor not in new_authors:
                    new_authors.append(bookAuthor)

    # guess this was meant to save repeat-scans of the same directory
    # if it contains multiple formats of the same book, but there was no code
    # that looked at the array. renamed from latest to processed to make
    # purpose clearer
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)
        for files in f:
            file_count += 1
            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before. Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same
            # subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
# 			If this is a book, try to get author/title/isbn/language
# 			If metadata.opf exists, use that
# 			else if epub or mobi, read metadata from the book
# 			else have to try pattern match for author/title	and look up isbn/lang from LT or GR late
                match = 0
                extn = ""
                
                if '.' in files:
                    words = files.split('.')
                    extn = words[len(words) - 1]
                    
                if formatter.is_valid_booktype(files):
                    logger.debug(
                        "[%s] Now scanning subdirectory %s" %
                        (dir.decode(lazylibrarian.SYS_ENCODING, 'replace'), subdirectory.decode(lazylibrarian.SYS_ENCODING, 'replace')))
                    
                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know 
                    # LL preferred authorname/bookname at this point
                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        else:
                            language = ""
                        if 'identifier' in res:
                            isbn = res['identifier']
                        else:
                            isbn = ""
                        match = 1
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:
                        # it's a book, but no external metadata found
                        # if it's an epub or a mobi we can try to read metadata
                        # from it
                        if (extn == "epub") or (extn == "mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)
                            try:
                                res = get_book_info(book_filename)
                            except:
                                res = {}
                            if 'title' in res and 'creator' in res:  # this is the minimum we need
                                book = res['title']
                                author = res['creator']
                                if 'language' in res:
                                    language = res['language']
                                else:
                                    language = ""
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                else:
                                    isbn = ""
                                logger.debug("book meta [%s] [%s] [%s] [%s]" %
                                    (isbn, language, author, book))
                                match = 1
                            else:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                if not match:
                    match = pattern.match(files)
                    if match:
                        author = match.group("author")
                        book = match.group("book")
                    else:
                        logger.debug("Pattern match failed [%s]" % files)

                if match:
                    processed_subdirectories.append(
                        subdirectory)  # flag that we found a book in this subdirectory
                    #
                    # If we have a valid looking isbn, and language != "Unknown", add it to cache
                    #
                    if not language:
                        language = "Unknown"

                    if not formatter.is_valid_isbn(isbn):
                        isbn = ""
                    if isbn != "" and language != "Unknown":
                        logger.debug(
                            "Found Language [%s] ISBN [%s]" %
                            (language, isbn))
                        # we need to add it to language cache if not already
                        # there, is_valid_isbn has checked length is 10 or 13
                        if len(isbn) == 10:
                            isbnhead = isbn[0:3]
                        else:
                            isbnhead = isbn[3:6]
                        match = myDB.action(
                            'SELECT lang FROM languages where isbn = "%s"' %
                            (isbnhead)).fetchone()
                        if not match:
                            myDB.action(
                                'insert into languages values ("%s", "%s")' %
                                (isbnhead, language))
                            logger.debug(
                                "Cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))
                        else:
                            logger.debug(
                                "Already cached Lang [%s] ISBN [%s]" %
                                (language, isbnhead))

                    # get authors name in a consistent format
                    if "," in author:  # "surname, forename"
                        words = author.split(',')
                        author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                    if author[1] == ' ':        
                        author = author.replace(' ', '.')
                        author = author.replace('..', '.')

                    # Check if the author exists, and import the author if not,
                    # before starting any complicated book-name matching to save repeating the search
                    #
                    check_exist_author = myDB.action(
                        'SELECT * FROM authors where AuthorName="%s"' %
                        author).fetchone()
                    if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                        # no match for supplied author, but we're allowed to
                        # add new ones

                        GR = GoodReads(author)
                        try:
                            author_gr = GR.find_author_id()
                        except:
                            logger.warn(
                                "Error finding author id for [%s]" %
                                author)
                            continue

                        # only try to add if GR data matches found author data
                        # not sure what this is for, never seems to fail??
                        if author_gr:
                            authorname = author_gr['authorname']

                            # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                            match_auth = author.replace('.', '_')
                            match_auth = match_auth.replace(' ', '_')
                            match_auth = match_auth.replace('__', '_')
                            match_name = authorname.replace('.', '_')
                            match_name = match_name.replace(' ', '_')
                            match_name = match_name.replace('__', '_')
                            match_name = common.remove_accents(match_name)
                            match_auth = common.remove_accents(match_auth)
                            # allow a degree of fuzziness to cater for different accented character handling.
                            # some author names have accents,
                            # filename may have the accented or un-accented version of the character
                            # The currently non-configurable value of fuzziness might need to go in config
                            # We stored GoodReads unmodified author name in
                            # author_gr, so store in LL db under that
                            match_fuzz = fuzz.ratio(match_auth, match_name)
                            if match_fuzz < 90:
                                logger.debug(
                                    "Failed to match author [%s] fuzz [%d]" %
                                    (author, match_fuzz))
                                logger.debug(
                                    "Failed to match author [%s] to authorname [%s]" %
                                    (match_auth, match_name))

                            # To save loading hundreds of books by unknown
                            # authors at GR or GB, ignore if author "Unknown"
                            if (author != "Unknown") and (match_fuzz >= 90):
                                # use "intact" name for author that we stored in
                                # GR author_dict, not one of the various mangled versions
                                # otherwise the books appear to be by a
                                # different author!
                                author = author_gr['authorname']
                                # this new authorname may already be in the
                                # database, so check again
                                check_exist_author = myDB.action(
                                    'SELECT * FROM authors where AuthorName="%s"' %
                                    author).fetchone()
                                if not check_exist_author:
                                    logger.debug(
                                        "Adding new author [%s]" %
                                        author)
                                    if author not in new_authors:
                                        new_authors.append(author)
                                    try:
                                        importer.addAuthorToDB(author)
                                        check_exist_author = myDB.action(
                                            'SELECT * FROM authors where AuthorName="%s"' %
                                            author).fetchone()
                                    except:
                                        continue

                    # check author exists in db, either newly loaded or already
                    # there
                    if not check_exist_author:
                        logger.debug(
                            "Failed to match author [%s] in database" %
                            author)
                    else:
                        # author exists, check if this book by this author is in our database
                        # metadata might have quotes in book name
                        book = book.replace('"', '').replace("'", "")
                        bookid = find_book_in_db(myDB, author, book)
                        if bookid:
                            # check if book is already marked as "Open" (if so,
                            # we already had it)
                            check_status = myDB.action(
                                'SELECT Status from books where BookID="%s"' %
                                bookid).fetchone()
                            if check_status['Status'] != 'Open':
                                # update status as we've got this book
                                myDB.action(
                                    'UPDATE books set Status="Open" where BookID="%s"' %
                                    bookid)
                                book_filename = os.path.join(
                                    r,
                                    files).encode(
                                        lazylibrarian.SYS_ENCODING)
                                # update book location so we can check if it
                                # gets removed, or allow click-to-open
                                myDB.action(
                                    'UPDATE books set BookFile="%s" where BookID="%s"' %
                                    (book_filename, bookid))
                                new_book_count += 1

    cachesize = myDB.action("select count(*) from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    if new_book_count:
        stats = myDB.action(
            "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("ISBN Language cache holds %s entries" % cachesize['count(*)'])
    stats = len(
        myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn(
            "There are %s books in your library with unknown language" %
            stats)

    logger.debug('Updating %i authors' % len(new_authors))
    for auth in new_authors:
        havebooks = len(
            myDB.select('select BookName from Books where status="%s" and AuthorName="%s"' %
                        ('Open', auth)))
        myDB.action(
            'UPDATE authors set HaveBooks="%s" where AuthorName="%s"' %
            (havebooks, auth))
        totalbooks = len(
            myDB.select('select BookName from Books where status!="%s" and AuthorName="%s"' %
                        ('Ignored', auth)))
        myDB.action(
            'UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' %
            (totalbooks, auth))

    logger.info('Library scan complete')

Example #38

Show file

File: magazinescan.py Project: Code-Slave/LazyLibrarian

def magazineScan():
    lazylibrarian.MAG_UPDATE = 1
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()

        mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER']
        mag_path = mag_path.split('$')[0]

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            if mag_path[0] not in '._':
                mag_path = '_' + mag_path
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)

        mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)
        if lazylibrarian.CONFIG['FULL_SCAN']:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?',
                                (issuefile, ))
                    logger.info('Issue %s - %s deleted as not found on disk' %
                                (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus":
                        "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            mags = myDB.select('SELECT * from magazines')
            # now check the magazine titles and delete any with no issues
            for mag in mags:
                title = mag['Title']
                count = myDB.select(
                    'SELECT COUNT(Title) as counter FROM issues WHERE Title=?',
                    (title, ))
                issues = count[0]['counter']
                if not issues:
                    logger.debug('Magazine %s deleted as no issues found' %
                                 title)
                    myDB.action('DELETE from magazines WHERE Title=?',
                                (title, ))

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e",
                "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace(
            "\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
                "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)):
            rootdir = makeUnicode(rootdir)
            filenames = [makeUnicode(item) for item in filenames]
            for fname in filenames:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            issuedate = match.group("issuedate")
                            title = match.group("title")
                            match = True
                        else:
                            match = False
                    except Exception:
                        match = False

                    if not match:
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(rootdir)
                            else:
                                logger.debug("Pattern match failed for [%s]" %
                                             fname)
                                continue
                        except Exception as e:
                            logger.debug("Invalid name format for [%s] %s %s" %
                                         (fname, type(e).__name__, str(e)))
                            continue

                    logger.debug("Found %s Issue %s" % (title, fname))

                    issuefile = os.path.join(rootdir,
                                             fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(
                        datetime.date.fromtimestamp(mtime))

                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired, IssueDate, MagazineAdded from magazines WHERE Title=?',
                        (title, ))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                        magissuedate = None
                        magazineadded = None
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)

                    issuedate = str(issuedate).zfill(
                        4)  # for sorting issue numbers

                    # is this issue already in the database?
                    controlValueDict = {"Title": title, "IssueDate": issuedate}
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match(
                        'SELECT Title from issues WHERE Title=? and IssueDate=?',
                        (title, issuedate))
                    if not iss_entry:
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)
                        logger.debug("Adding issue %s %s" % (title, issuedate))

                    create_cover(issuefile)
                    lazylibrarian.postprocess.processMAGOPF(
                        issuefile, title, issuedate, issue_id)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover":
                            os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(
                                issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict,
                                    controlValueDict)

        magcount = myDB.match("select count(*) from magazines")
        isscount = myDB.match("select count(*) from issues")

        logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" %
                    (magcount['count(*)'], plural(magcount['count(*)']),
                     isscount['count(*)'], plural(isscount['count(*)'])))
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' %
                     traceback.format_exc())

Example #39

Show file

File: gb.py Project: andyuc88/LazyLibrarian

    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' % bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,))
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],))
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                 (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" % (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' % (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)

Example #40

Show file

File: librarysync.py Project: NVRemoteDev/LazyLibrarian

def LibraryScan(dir=None):
    if not dir:
        if not lazylibrarian.DOWNLOAD_DIR:
            return
        else:
            dir = lazylibrarian.DOWNLOAD_DIR

    if not os.path.isdir(dir):
        logger.warn(
            'Cannot find directory: %s. Not scanning' %
            dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))
        return

    myDB = database.DBConnection()

    myDB.action('drop table if exists stats')
    myDB.action(
        'create table stats (authorname text, GR_book_hits int, GR_lang_hits int, LT_lang_hits int, \
                            GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int )')

    logger.info(
        'Scanning ebook directory: %s' %
        dir.decode(lazylibrarian.SYS_ENCODING, 'replace'))

    new_book_count = 0
    file_count = 0

    if lazylibrarian.FULL_SCAN:
        books = myDB.select(
            'select AuthorName, BookName, BookFile, BookID from books where Status="Open"')
        status = lazylibrarian.NOTFOUND_STATUS
        logger.info('Missing books will be marked as %s' % status)
        for book in books:
            bookName = book['BookName']
            bookAuthor = book['AuthorName']
            bookID = book['BookID']
            bookfile = book['BookFile']

            if not(bookfile and os.path.isfile(bookfile)):
                myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName))
        
    # to save repeat-scans of the same directory if it contains multiple formats of the same book, 
    # keep track of which directories we've already looked at 
    processed_subdirectories = []

    matchString = ''
    for char in lazylibrarian.EBOOK_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the EBOOK_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for r, d, f in os.walk(dir):
        for directory in d[:]:
            if directory.startswith("."):
                d.remove(directory)
            # prevent magazine being scanned
            if directory.startswith("_"):
                d.remove(directory)

        for files in f:
            file_count += 1

            if isinstance(r, str):
                r = r.decode('utf-8')

            subdirectory = r.replace(dir, '')
            # Added new code to skip if we've done this directory before.
            # Made this conditional with a switch in config.ini
            # in case user keeps multiple different books in the same subdirectory
            if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories):
                logger.debug("[%s] already scanned" % subdirectory)
            else:
                # If this is a book, try to get author/title/isbn/language
                # if epub or mobi, read metadata from the book
                # If metadata.opf exists, use that allowing it to override
                # embedded metadata. User may have edited metadata.opf
                # to merge author aliases together
                # If all else fails, try pattern match for author/title
                # and look up isbn/lang from LT or GR later
                match = 0
                if formatter.is_valid_booktype(files):

                    logger.debug("[%s] Now scanning subdirectory %s" %
                                 (dir, subdirectory))

                    language = "Unknown"
                    isbn = ""
                    book = ""
                    author = ""
                    words = files.split('.')
                    extn = words[len(words) - 1]

                    # if it's an epub or a mobi we can try to read metadata from it
                    if (extn == "epub") or (extn == "mobi"):
                        book_filename = os.path.join(
                            r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING))

                        try:
                            res = get_book_info(book_filename)
                        except:
                            res = {}
                        if 'title' in res and 'creator' in res:  # this is the minimum we need
                            match = 1
                            book = res['title']
                            author = res['creator']
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'type' in res:
                                extn = res['type']

                            logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                         (isbn, language, author, book, extn))
                        else:

                            logger.debug("Book meta incomplete in %s" % book_filename)

                    # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                    # just look for any .opf file in the current directory since we don't know
                    # LL preferred authorname/bookname at this point.
                    # Allow metadata in file to override book contents as may be users pref

                    metafile = opf_file(r)
                    try:
                        res = get_book_info(metafile)
                    except:
                        res = {}
                    if 'title' in res and 'creator' in res:  # this is the minimum we need
                        match = 1
                        book = res['title']
                        author = res['creator']
                        if 'language' in res:
                            language = res['language']
                        if 'identifier' in res:
                            isbn = res['identifier']
                        logger.debug(
                            "file meta [%s] [%s] [%s] [%s]" %
                            (isbn, language, author, book))
                    else:
                        logger.debug("File meta incomplete in %s" % metafile)

                    if not match:  # no author/book from metadata file, and not embedded either
                        match = pattern.match(files)
                        if match:
                            author = match.group("author")
                            book = match.group("book")
                        else:
                            logger.debug("Pattern match failed [%s]" % files)

                    if match:
                        # flag that we found a book in this subdirectory
                        processed_subdirectories.append(subdirectory)

                        # If we have a valid looking isbn, and language != "Unknown", add it to cache
                        if language != "Unknown" and formatter.is_valid_isbn(isbn):
                            logger.debug(
                                "Found Language [%s] ISBN [%s]" %
                                (language, isbn))
                            # we need to add it to language cache if not already
                            # there, is_valid_isbn has checked length is 10 or 13
                            if len(isbn) == 10:
                                isbnhead = isbn[0:3]
                            else:
                                isbnhead = isbn[3:6]
                            match = myDB.action(
                                'SELECT lang FROM languages where isbn = "%s"' %
                                (isbnhead)).fetchone()
                            if not match:
                                myDB.action(
                                    'insert into languages values ("%s", "%s")' %
                                    (isbnhead, language))
                                logger.debug(
                                    "Cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))
                            else:
                                logger.debug(
                                    "Already cached Lang [%s] ISBN [%s]" %
                                    (language, isbnhead))

                        # get authors name in a consistent format
                        if "," in author:  # "surname, forename"
                            words = author.split(',')
                            author = words[1].strip() + ' ' + words[0].strip()  # "forename surname"
                        if author[1] == ' ':
                            author = author.replace(' ', '.')
                            author = author.replace('..', '.')

                        # Check if the author exists, and import the author if not,
                        # before starting any complicated book-name matching to save repeating the search
                        #
                        check_exist_author = myDB.action(
                            'SELECT * FROM authors where AuthorName="%s"' %
                            author).fetchone()
                        if not check_exist_author and lazylibrarian.ADD_AUTHOR:
                            # no match for supplied author, but we're allowed to
                            # add new ones

                            GR = GoodReads(author)
                            try:
                                author_gr = GR.find_author_id()
                            except:
                                logger.warn(
                                    "Error finding author id for [%s]" %
                                    author)
                                continue

                            # only try to add if GR data matches found author data
                            if author_gr:
                                authorname = author_gr['authorname']

                                # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien"
                                match_auth = author.replace('.', '_')
                                match_auth = match_auth.replace(' ', '_')
                                match_auth = match_auth.replace('__', '_')
                                match_name = authorname.replace('.', '_')
                                match_name = match_name.replace(' ', '_')
                                match_name = match_name.replace('__', '_')
                                match_name = common.remove_accents(match_name)
                                match_auth = common.remove_accents(match_auth)
                                # allow a degree of fuzziness to cater for different accented character handling.
                                # some author names have accents,
                                # filename may have the accented or un-accented version of the character
                                # The currently non-configurable value of fuzziness might need to go in config
                                # We stored GoodReads unmodified author name in
                                # author_gr, so store in LL db under that
                                # fuzz.ratio doesn't lowercase for us
                                match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower())
                                if match_fuzz < 90:
                                    logger.debug(
                                        "Failed to match author [%s] fuzz [%d]" %
                                        (author, match_fuzz))
                                    logger.debug(
                                        "Failed to match author [%s] to authorname [%s]" %
                                        (match_auth, match_name))

                                # To save loading hundreds of books by unknown
                                # authors at GR or GB, ignore if author "Unknown"
                                if (author != "Unknown") and (match_fuzz >= 90):
                                    # use "intact" name for author that we stored in
                                    # GR author_dict, not one of the various mangled versions
                                    # otherwise the books appear to be by a different author!
                                    author = author_gr['authorname']
                                    # this new authorname may already be in the
                                    # database, so check again
                                    check_exist_author = myDB.action(
                                        'SELECT * FROM authors where AuthorName="%s"' %
                                        author).fetchone()
                                    if not check_exist_author:
                                        logger.debug(
                                            "Adding new author [%s]" %
                                            author)
                                        try:
                                            importer.addAuthorToDB(author)
                                            check_exist_author = myDB.action(
                                                'SELECT * FROM authors where AuthorName="%s"' %
                                                author).fetchone()
                                        except:
                                            continue

                        # check author exists in db, either newly loaded or already there
                        if not check_exist_author:
                            logger.debug(
                                "Failed to match author [%s] in database" %
                                author)
                        else:
                            # author exists, check if this book by this author is in our database
                            # metadata might have quotes in book name
                            book = book.replace('"', '').replace("'", "")
                            bookid = find_book_in_db(myDB, author, book)

                            if bookid:
                                # check if book is already marked as "Open" (if so,
                                # we already had it)

                                check_status = myDB.action(
                                    'SELECT Status from books where BookID="%s"' %
                                    bookid).fetchone()
                                if check_status['Status'] != 'Open':
                                    # update status as we've got this book

                                    myDB.action(
                                        'UPDATE books set Status="Open" where BookID="%s"' %
                                        bookid)

                                    book_filename = os.path.join(r, files)

                                    # update book location so we can check if it
                                    # gets removed, or allow click-to-open

                                    myDB.action(
                                        'UPDATE books set BookFile="%s" where BookID="%s"' %
                                        (book_filename, bookid))

                                    new_book_count += 1

    cachesize = myDB.action("select count('ISBN') as counter from languages").fetchone()
    logger.info(
        "%s new/modified books found and added to the database" %
        new_book_count)
    logger.info("%s files processed" % file_count)
    stats = myDB.action(
        "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
            sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached) FROM stats").fetchone()
    if stats['sum(GR_book_hits)'] is not None:
        # only show stats if new books added
        if lazylibrarian.BOOK_API == "GoogleBooks":
            logger.debug(
                "GoogleBooks was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoogleBooks language was changed %s times" %
                stats['sum(GB_lang_change)'])
        if lazylibrarian.BOOK_API == "GoodReads":
            logger.debug(
                "GoodReads was hit %s times for books" %
                stats['sum(GR_book_hits)'])
            logger.debug(
                "GoodReads was hit %s times for languages" %
                stats['sum(GR_lang_hits)'])
        logger.debug(
            "LibraryThing was hit %s times for languages" %
            stats['sum(LT_lang_hits)'])
        logger.debug(
            "Language cache was hit %s times" %
            stats['sum(cache_hits)'])
        logger.debug(
            "Unwanted language removed %s books" %
            stats['sum(bad_lang)'])
        logger.debug(
            "Unwanted characters removed %s books" %
            stats['sum(bad_char)'])
        logger.debug(
            "Unable to cache %s books with missing ISBN" %
            stats['sum(uncached)'])
    logger.debug("Cache %s hits, %s miss" % (lazylibrarian.CACHE_HIT, lazylibrarian.CACHE_MISS))
    logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])
    stats = len(myDB.select('select BookID from Books where status="Open" and BookLang="Unknown"'))
    if stats:
        logger.warn("There are %s books in your library with unknown language" % stats)

    authors = myDB.select('select AuthorName from authors')
    # Update bookcounts for all authors, not just new ones - refresh may have located
    # new books for existing authors especially if switched provider gb/gr
    logger.debug('Updating bookcounts for %i authors' % len(authors))
    for author in authors:
        name = author['AuthorName']
        havebooks = myDB.action(
            'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' %
            name).fetchone()
        myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], name))
        totalbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % name).fetchone()        
        myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], name))
        unignoredbooks = myDB.action(
            'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' %
            name).fetchone()
        myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], name))

    covers = myDB.action("select  count('bookimg') as counter from books where bookimg like 'http%'").fetchone()
    logger.info("Caching covers for %s books" % covers['counter'])

    images = myDB.action('select bookid, bookimg, bookname from books where bookimg like "http%"')
    for item in images:
        bookid = item['bookid']
        bookimg = item['bookimg']
        bookname = item['bookname']
        newimg = bookwork.cache_cover(bookid, bookimg)
        if newimg != bookimg:
            myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))
    logger.info('Library scan complete')

Example #41

Show file

File: postprocess.py Project: Trixter69/LazyLibrarian

                            try:
                                os.makedirs(dest_path)
                            except Exception, e:
                                logger.debug(str(e))
                        if lazylibrarian.DESTINATION_COPY == 1:
                            shutil.copyfile(os.path.join(pp_path, file3), os.path.join(dest_path, file3))
                        else:
                            shutil.move(os.path.join(pp_path, file3), os.path.join(dest_path, file3))
        else:
            shutil.move(pp_path, dest_path)
            logger.debug('Successfully moved %s to %s.' % (pp_path, dest_path))

        pp = True

        # try and rename the actual book file & remove non-book files
        booktype_list = formatter.getList(lazylibrarian.EBOOK_TYPE)
        for file2 in os.listdir(dest_path):
            #logger.debug('file extension: ' + str(file2).split('.')[-1])
            if ((file2.lower().find(".jpg") <= 0) & (file2.lower().find(".opf") <= 0)):
                if ((str(file2).split('.')[-1]) not in booktype_list):
                    logger.debug('Removing unwanted file: %s' % str(file2))
                    os.remove(os.path.join(dest_path, file2))
                else:
                    logger.debug('Moving %s to directory %s' % (file2, dest_path))
                    os.rename(os.path.join(dest_path, file2), os.path.join(dest_path, global_name + '.' + str(file2).split('.')[-1]))
        try:
            os.chmod(dest_path, 0777)
        except Exception, e:
            logger.debug("Could not chmod path: " + str(dest_path))
    except OSError, e:
        logger.error('Could not create destination folder or rename the downloaded ebook. Check permissions of: ' + lazylibrarian.DESTINATION_DIR)

Example #42

Show file

def magazineScan(thread=None):
    # rename this thread
    if thread is None:
        threading.currentThread().name = "MAGAZINESCAN"

    myDB = database.DBConnection()

    mag_path = lazylibrarian.MAG_DEST_FOLDER
    if '$' in mag_path:
        mag_path = mag_path.split('$')[0]

    if lazylibrarian.MAG_RELATIVE:
        if mag_path[0] not in '._':
            mag_path = '_' + mag_path
        mag_path = os.path.join(lazylibrarian.DESTINATION_DIR, mag_path).encode(lazylibrarian.SYS_ENCODING)
    else:
        mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)

    if lazylibrarian.FULL_SCAN:
        mags = myDB.select('select * from Issues')
        # check all the issues are still there, delete entry if not
        for mag in mags:
            title = mag['Title']
            issuedate = mag['IssueDate']
            issuefile = mag['IssueFile']

            if issuefile and not os.path.isfile(issuefile):
                myDB.action('DELETE from Issues where issuefile="%s"' % issuefile)
                logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate))
                controlValueDict = {"Title": title}
                newValueDict = {
                    "LastAcquired": None,       # clear magazine dates
                    "IssueDate": None,       # we will fill them in again later
                    "IssueStatus": "Skipped"    # assume there are no issues now
                }
                myDB.upsert("magazines", newValueDict, controlValueDict)
                logger.debug('Magazine %s details reset' % title)

        mags = myDB.select('SELECT * from magazines')
        # now check the magazine titles and delete any with no issues
        for mag in mags:
            title = mag['Title']
            count = myDB.select('SELECT COUNT(Title) as counter FROM issues WHERE Title="%s"' % title)
            issues = count[0]['counter']
            if not issues:
                logger.debug('Magazine %s deleted as no issues found' % title)
                myDB.action('DELETE from magazines WHERE Title="%s"' % title)

    logger.info(' Checking [%s] for magazines' % mag_path)

    matchString = ''
    for char in lazylibrarian.MAG_DEST_FILE:
        matchString = matchString + '\\' + char
    # massage the MAG_DEST_FILE config parameter into something we can use
    # with regular expression matching
    booktypes = ''
    count = -1
    booktype_list = formatter.getList(lazylibrarian.MAG_TYPE)
    for book_type in booktype_list:
        count += 1
        if count == 0:
            booktypes = book_type
        else:
            booktypes = booktypes + '|' + book_type
    matchString = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
        "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']'
    pattern = re.compile(matchString, re.VERBOSE)

    for dirname, dirnames, filenames in os.walk(mag_path):
        for fname in filenames[:]:
            # maybe not all magazines will be pdf?
            if formatter.is_valid_booktype(fname, booktype='mag'):
                try:
                    match = pattern.match(fname)
                    if match:
                        issuedate = match.group("issuedate")
                        title = match.group("title")
                        # print issuedate
                        # print title
                    else:
                        logger.debug("Pattern match failed for [%s]" % fname)
                        continue
                        # title = fname.split('-')[3]
                        # title = title.split('.')[-2]
                        # title = title.strip()
                        # issuedate = fname.split(' ')[0]
                except:
                    logger.debug("Invalid name format for [%s]" % fname)
                    continue

                logger.debug("Found Issue %s" % fname)

                issuefile = os.path.join(dirname, fname)  # full path to issue.pdf
                mtime = os.path.getmtime(issuefile)
                iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime))

                # magazines : Title, Frequency, Regex, Status, MagazineAdded, LastAcquired, IssueDate, IssueStatus
                # issues    : Title, IssueAcquired, IssueDate, IssueFile

                controlValueDict = {"Title": title}

                # is this magazine already in the database?
                mag_entry = myDB.select('SELECT * from magazines WHERE Title="%s"' % title)
                if not mag_entry:
                    # need to add a new magazine to the database
                    newValueDict = {
                        "Frequency": None,  # unused currently
                        "Regex": None,
                        "Status": "Active",
                        "MagazineAdded": None,
                        "LastAcquired": None,
                        "IssueDate": None,
                        "IssueStatus": "Skipped"
                    }
                    logger.debug("Adding magazine %s" % title)
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    lastacquired = None
                    magissuedate = None
                    magazineadded = None
                else:
                    maglastacquired = mag_entry[0]['LastAcquired']
                    magissuedate = mag_entry[0]['IssueDate']
                    magazineadded = mag_entry[0]['MagazineAdded']

                # is this issue already in the database?
                controlValueDict = {"Title": title, "IssueDate": issuedate}
                issue_id = create_id("%s %s" % (title, issuedate))
                iss_entry = myDB.select('SELECT * from issues WHERE Title="%s" and IssueDate="%s"' % (
                    title, issuedate))
                if not iss_entry:
                    newValueDict = {
                        "IssueAcquired": iss_acquired,
                        "IssueID": issue_id,
                        "IssueFile": issuefile
                    }
                    logger.debug("Adding issue %s %s" % (title, issuedate))
                else:
                    # don't really need to do this each time
                    newValueDict = {"IssueID": issue_id}
                myDB.upsert("Issues", newValueDict, controlValueDict)

                create_cover(issuefile)

                # see if this issues date values are useful
                # if its a new magazine, magazineadded,magissuedate,lastacquired are all None
                # if magazineadded is NOT None, but the others are, we've deleted one or more issues
                # so the most recent dates may be wrong and need to be updated.
                # Set magazine_issuedate to issuedate of most recent issue we have
                # Set magazine_added to acquired date of earliest issue we have
                # Set magazine_lastacquired to acquired date of most recent issue we have
                # acquired dates are read from magazine file timestamps
                if magazineadded is None:  # new magazine, this might be the only issue
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "MagazineAdded": iss_acquired,
                        "LastAcquired": iss_acquired,
                        "IssueDate": issuedate,
                        "IssueStatus": "Open"
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                else:
                    if iss_acquired < magazineadded:
                        controlValueDict = {"Title": title}
                        newValueDict = {"MagazineAdded": iss_acquired}
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                    if maglastacquired is None or iss_acquired > maglastacquired:
                        controlValueDict = {"Title": title}
                        newValueDict = {"LastAcquired": iss_acquired}
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                    if magissuedate is None or issuedate > magissuedate:
                        controlValueDict = {"Title": title}
                        newValueDict = {"IssueDate": issuedate}
                        myDB.upsert("magazines", newValueDict, controlValueDict)

    magcount = myDB.action("select count(*) from magazines").fetchone()
    isscount = myDB.action("select count(*) from issues").fetchone()

    logger.info("Magazine scan complete, found %s magazines, %s issues" % (magcount['count(*)'], isscount['count(*)']))

Example #43

Show file

File: resultlist.py Project: andyuc88/LazyLibrarian

def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                    ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                    '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                    ':': '', '!': '', '-': ' ', '\s\s': ' '}

        dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
               ',': '', '*': '', ':': '.', ';': '', '\'': ''}

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ", resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match, resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match('SELECT * from wanted WHERE NZBurl=?', (url,))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" % (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith('http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" % (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" % (resultTitle, word))
                        break

            size_temp = check_int(res[prefix + 'size'], 1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res['tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [x for x in wordlist if x not in getList(author.lower())]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [x for x in words if x not in getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [i for i, x in enumerate(typelist) if x == item]:
                            score += i + 1

                matches.append([score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info('Nearest match (%s%%): %s using %s search for %s %s' %
                            (score, newValueDict['NZBtitle'], searchtype, book['authorName'], book['bookName']))
            else:
                logger.info('Best match (%s%%): %s using %s search, %s priority %s' %
                            (score, newValueDict['NZBtitle'], searchtype, newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" % (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' % traceback.format_exc())

Example #44

Show file

File: grsync.py Project: nicecai/LazyLibrarian

def grsync(status, shelf):
    # noinspection PyBroadException
    try:
        shelf = shelf.lower()
        logger.info('Syncing %s to %s shelf' % (status, shelf))
        myDB = database.DBConnection()
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        GA = grauth()
        GR = None
        shelves = GA.get_shelf_list()
        found = False
        for item in shelves:  # type: dict
            if item['name'] == shelf:
                found = True
                break
        if not found:
            res, msg = GA.create_shelf(shelf=shelf)
            if not res:
                logger.debug("Unable to create shelf %s: %s" % (shelf, msg))
                return 0, 0
            else:
                logger.debug("Created new goodreads shelf: %s" % shelf)

        gr_shelf = GA.get_gr_shelf_contents(shelf=shelf)
        dstatus = status
        if dstatus == "Open":
            dstatus += "/Have"

        logger.info("There are %s %s books, %s books on goodreads %s shelf" %
                    (len(ll_list), dstatus, len(gr_shelf), shelf))

        # Sync method for WANTED:
        # Get results of last_sync (if any)
        # For each book in last_sync
        #    if not in ll_list, new deletion, remove from gr_shelf
        #    if not in gr_shelf, new deletion, remove from ll_list, mark Skipped
        # For each book in ll_list
        #    if not in last_sync, new addition, add to gr_shelf
        # For each book in gr_shelf
        #    if not in last sync, new addition, add to ll_list, mark Wanted
        #
        # save ll WANTED as last_sync

        # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN

        cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % (
            "goodreads", shelf)
        res = myDB.match(cmd)
        last_sync = []
        shelf_changed = 0
        ll_changed = 0
        if res:
            last_sync = getList(res['SyncList'])

        added_to_shelf = list(set(gr_shelf) - set(last_sync))
        removed_from_shelf = list(set(last_sync) - set(gr_shelf))
        added_to_ll = list(set(ll_list) - set(last_sync))
        removed_from_ll = list(set(last_sync) - set(ll_list))

        logger.info("%s missing from lazylibrarian %s" %
                    (len(removed_from_ll), shelf))
        for book in removed_from_ll:
            # first the deletions since last sync...
            try:
                res, content = GA.BookToList(book, shelf, action='remove')
            except Exception as e:
                logger.debug("Error removing %s from %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s removed from %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to remove %s from %s shelf: %s" %
                            (book, shelf, content))

        logger.info("%s missing from goodreads %s" %
                    (len(removed_from_shelf), shelf))
        for book in removed_from_shelf:
            # deleted from goodreads
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if res['Status'] in ['Have', 'Wanted']:
                    myDB.action(
                        'UPDATE books SET Status="Skipped" WHERE BookID=?',
                        (book, ))
                    ll_changed += 1
                    logger.debug("%10s set to Skipped" % book)
                else:
                    logger.warn("Not removing %s, book is marked %s" %
                                (book, res['Status']))

        # new additions to lazylibrarian
        logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf))
        for book in added_to_ll:
            try:
                res, content = GA.BookToList(book, shelf, action='add')
            except Exception as e:
                logger.debug("Error adding %s to %s: %s %s" %
                             (book, shelf, type(e).__name__, str(e)))
                res = None
                content = ''
            if res:
                logger.debug("%10s added to %s shelf" % (book, shelf))
                shelf_changed += 1
            else:
                logger.warn("Failed to add %s to %s shelf: %s" %
                            (book, shelf, content))

        # new additions to goodreads shelf
        logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf))
        for book in added_to_shelf:
            cmd = 'select Status from books where BookID="%s"' % book
            res = myDB.match(cmd)
            if not res:
                logger.debug('Adding new book %s to database' % book)
                if not GR:
                    GR = GoodReads(book)
                GR.find_book(book)
                res = myDB.match(cmd)
            if not res:
                logger.warn('Book %s not found in database' % book)
            else:
                if status == 'Open':
                    if res['Status'] == 'Open':
                        logger.warn("Book %s is already marked Open" % book)
                    else:
                        myDB.action(
                            'UPDATE books SET Status="Have" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Have" % book)
                elif status == 'Wanted':
                    # if in "wanted" and already marked "Open/Have", optionally delete from "wanted"
                    # (depending on user prefs, to-read and wanted might not be the same thing)
                    if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in [
                            'Open', 'Have'
                    ]:
                        try:
                            res, content = GA.BookToList(book,
                                                         shelf,
                                                         action='remove')
                        except Exception as e:
                            logger.debug(
                                "Error removing %s from %s: %s %s" %
                                (book, shelf, type(e).__name__, str(e)))
                            res = None
                            content = ''
                        if res:
                            logger.debug("%10s removed from %s shelf" %
                                         (book, shelf))
                            shelf_changed += 1
                        else:
                            logger.warn(
                                "Failed to remove %s from %s shelf: %s" %
                                (book, shelf, content))
                    elif res['Status'] != 'Open':
                        myDB.action(
                            'UPDATE books SET Status="Wanted" WHERE BookID=?',
                            (book, ))
                        ll_changed += 1
                        logger.debug("%10s set to Wanted" % book)
                    else:
                        logger.warn(
                            "Not setting %s as Wanted, already marked Open" %
                            book)

        # get new definitive list from ll
        cmd = 'select bookid from books where status="%s"' % status
        if status == 'Open':
            cmd += ' or status="Have"'
        results = myDB.select(cmd)
        ll_list = []
        for terms in results:
            ll_list.append(terms['bookid'])

        # store as comparison for next sync
        controlValueDict = {"UserID": "goodreads", "Label": shelf}
        newValueDict = {
            "Date": str(time.time()),
            "Synclist": ', '.join(ll_list)
        }
        myDB.upsert("sync", newValueDict, controlValueDict)

        logger.debug('Sync %s to %s shelf complete' % (status, shelf))
        return shelf_changed, ll_changed

    except Exception:
        logger.error('Unhandled exception in grsync: %s' %
                     traceback.format_exc())
        return 0, 0

Example #45

Show file

File: searchmag.py Project: Code-Slave/LazyLibrarian

def search_magazines(mags=None, reset=False):
    # produce a list of magazines to search for, tor, nzb, torznab, rss
    # noinspection PyBroadException
    try:
        threadname = threading.currentThread().name
        if "Thread-" in threadname:
            if mags is None:
                threading.currentThread().name = "SEARCHALLMAG"
            else:
                threading.currentThread().name = "SEARCHMAG"

        myDB = database.DBConnection()
        searchlist = []

        if mags is None:  # backlog search
            searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \
                                 IssueDate from magazines WHERE Status="Active"'
                                     )
        else:
            searchmags = []
            for magazine in mags:
                searchmags_temp = myDB.select(
                    'SELECT Title, Regex, LastAcquired, IssueDate from magazines \
                                          WHERE Title=? AND Status="Active"',
                    (magazine['bookid'], ))
                for terms in searchmags_temp:
                    searchmags.append(terms)

        if len(searchmags) == 0:
            threading.currentThread().name = "WEBSERVER"
            return

        # should clear old search results as might not be available any more
        # ie torrent not available, changed providers, out of news server retention etc.
        # Only delete the "skipped" ones, not wanted/snatched/processed/ignored
        logger.debug("Removing old magazine search results")
        myDB.action('DELETE from pastissues WHERE Status="Skipped"')

        logger.info('Searching for %i magazine%s' %
                    (len(searchmags), plural(len(searchmags))))

        for searchmag in searchmags:
            bookid = searchmag['Title']
            searchterm = searchmag['Regex']

            if not searchterm:
                dic = {
                    '...': '',
                    ' & ': ' ',
                    ' = ': ' ',
                    '?': '',
                    '$': 's',
                    ' + ': ' ',
                    '"': '',
                    ',': '',
                    '*': ''
                }
                # strip accents from the magazine title for easier name-matching
                searchterm = unaccented_str(searchmag['Title'])
                if not searchterm:
                    # unless there are no ascii characters left
                    searchterm = searchmag['Title']
                searchterm = replace_all(searchterm, dic)

                searchterm = re.sub('[.\-/]', ' ', searchterm)
                searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)

            searchlist.append({"bookid": bookid, "searchterm": searchterm})

        if not searchlist:
            logger.warn(
                'There is nothing to search for.  Mark some magazines as active.'
            )

        for book in searchlist:

            resultlist = []

            if lazylibrarian.USE_NZB():
                resultlist, nproviders = IterateOverNewzNabSites(book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No nzb providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_NZB_MSG = timenow

            if lazylibrarian.USE_DIRECT():
                dir_resultlist, nproviders = IterateOverDirectSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_DIRECT_MSG,
                                 0) + 1200 < timenow:
                        logger.warn(
                            'No direct providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_DIRECT_MSG = timenow

                if dir_resultlist:
                    for item in dir_resultlist:  # reformat the results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_TOR():
                tor_resultlist, nproviders = IterateOverTorrentSites(
                    book, 'mag')
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No tor providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_TOR_MSG = timenow

                if tor_resultlist:
                    for item in tor_resultlist:  # reformat the torrent results so they look like nzbs
                        resultlist.append({
                            'bookid': item['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate':
                            'Fri, 01 Jan 1970 00:00:00 +0100',  # fake date as none returned from torrents
                            'nzbsize': item['tor_size'],
                            'nzbmode': 'torrent'
                        })

            if lazylibrarian.USE_RSS():
                rss_resultlist, nproviders = IterateOverRSSSites()
                if not nproviders:
                    # don't nag. Show warning message no more than every 20 mins
                    timenow = int(time.time())
                    if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow:
                        logger.warn(
                            'No rss providers are available. Check config and blocklist'
                        )
                        lazylibrarian.NO_RSS_MSG = timenow

                if rss_resultlist:
                    for item in rss_resultlist:  # reformat the rss results so they look like nzbs
                        resultlist.append({
                            'bookid': book['bookid'],
                            'nzbprov': item['tor_prov'],
                            'nzbtitle': item['tor_title'],
                            'nzburl': item['tor_url'],
                            'nzbdate': item[
                                'tor_date'],  # may be fake date as none returned from rss torrents, only rss nzb
                            'nzbsize': item['tor_size'],
                            'nzbmode': item['tor_type']
                        })

            if not resultlist:
                logger.debug("No results for magazine %s" % book['searchterm'])
            else:
                bad_name = 0
                bad_date = 0
                old_date = 0
                rejects = 0
                total_nzbs = 0
                new_date = 0
                maglist = []
                issues = []
                bookid = ''
                for nzb in resultlist:
                    total_nzbs += 1
                    bookid = nzb['bookid']
                    # strip accents from the magazine title for easier name-matching
                    nzbtitle = unaccented_str(nzb['nzbtitle'])
                    if not nzbtitle:
                        # unless it's not a latin-1 encodable name
                        nzbtitle = nzb['nzbtitle']
                    nzbtitle = nzbtitle.replace('"', '').replace(
                        "'", "")  # suppress " in titles
                    nzburl = nzb['nzburl']
                    nzbprov = nzb['nzbprov']
                    nzbdate_temp = nzb['nzbdate']
                    nzbsize_temp = nzb['nzbsize']
                    nzbsize_temp = check_int(
                        nzbsize_temp, 1000
                    )  # not all torrents returned by torznab have a size
                    nzbsize = round(float(nzbsize_temp) / 1048576, 2)
                    nzbdate = nzbdate2format(nzbdate_temp)
                    nzbmode = nzb['nzbmode']

                    results = myDB.match(
                        'SELECT * from magazines WHERE Title=?', (bookid, ))
                    if not results:
                        logger.debug(
                            'Magazine [%s] does not match search term [%s].' %
                            (nzbtitle, bookid))
                        bad_name += 1
                    else:
                        rejected = False
                        maxsize = check_int(
                            lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0)
                        if maxsize and nzbsize > maxsize:
                            logger.debug("Rejecting %s, too large" % nzbtitle)
                            rejected = True

                        if not rejected:
                            minsize = check_int(
                                lazylibrarian.CONFIG['REJECT_MAGMIN'], 0)
                            if minsize and nzbsize < minsize:
                                logger.debug("Rejecting %s, too small" %
                                             nzbtitle)
                                rejected = True

                        if not rejected:
                            dic = {
                                '.': ' ',
                                '-': ' ',
                                '/': ' ',
                                '+': ' ',
                                '_': ' ',
                                '(': '',
                                ')': ''
                            }
                            nzbtitle_formatted = replace_all(nzbtitle,
                                                             dic).strip()
                            # Need to make sure that substrings of magazine titles don't get found
                            # (e.g. Maxim USA will find Maximum PC USA)
                            # remove extra spaces if they're in a row
                            if nzbtitle_formatted and nzbtitle_formatted[
                                    0] == '[' and nzbtitle_formatted[-1] == ']':
                                nzbtitle_formatted = nzbtitle_formatted[1:-1]
                            nzbtitle_exploded_temp = " ".join(
                                nzbtitle_formatted.split())
                            nzbtitle_exploded = nzbtitle_exploded_temp.split(
                                ' ')

                            if ' ' in bookid:
                                bookid_exploded = bookid.split(' ')
                            else:
                                bookid_exploded = [bookid]

                            # check nzb has magazine title and a date/issue nr
                            # eg The MagPI July 2015

                            if len(nzbtitle_exploded) > len(bookid_exploded):
                                # needs to be longer as it has to include a date
                                # check all the words in the mag title are in the nzbtitle
                                rejected = False
                                wlist = []
                                for word in nzbtitle_exploded:
                                    wlist.append(unaccented(word).lower())
                                for word in bookid_exploded:
                                    if unaccented(word).lower() not in wlist:
                                        rejected = True
                                        break

                                if rejected:
                                    logger.debug(
                                        u"Magazine title match failed " +
                                        bookid + " for " + nzbtitle_formatted)
                                else:
                                    logger.debug(u"Magazine matched " +
                                                 bookid + " for " +
                                                 nzbtitle_formatted)
                            else:
                                logger.debug("Magazine name too short (%s)" %
                                             len(nzbtitle_exploded))
                                rejected = True

                        if not rejected:
                            blocked = myDB.match(
                                'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                                (nzburl, ))
                            if blocked:
                                logger.debug(
                                    "Rejecting %s, blacklisted at %s" %
                                    (nzbtitle_formatted, blocked['NZBprov']))
                                rejected = True

                        if not rejected:
                            reject_list = getList(
                                str(results['Reject']).lower())
                            reject_list += getList(
                                lazylibrarian.CONFIG['REJECT_MAGS'])
                            lower_title = unaccented(
                                nzbtitle_formatted).lower()
                            lower_bookid = unaccented(bookid).lower()
                            if reject_list:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Reject: %s' %
                                                 str(reject_list))
                                    logger.debug('Title: %s' % lower_title)
                                    logger.debug('Bookid: %s' % lower_bookid)
                            for word in reject_list:
                                if word in lower_title and word not in lower_bookid:
                                    rejected = True
                                    logger.debug("Rejecting %s, contains %s" %
                                                 (nzbtitle_formatted, word))
                                    break

                        regex_pass = 0
                        if not rejected:
                            # Magazine names have many different styles of date
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            # MonthName DD YYYY or MonthName DD, YYYY
                            # YYYY MM or YYYY MM DD
                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            # nn YYYY issue number without "Nr" before it
                            # issue and year as a single 6 digit string eg 222015
                            newdatish = "none"
                            # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY
                            pos = 0
                            while pos < len(nzbtitle_exploded):
                                year = check_year(nzbtitle_exploded[pos])
                                if year and pos:
                                    month = month2num(nzbtitle_exploded[pos -
                                                                        1])
                                    if month:
                                        if pos - 1:
                                            day = check_int(
                                                nzbtitle_exploded[pos - 2], 1)
                                            if day > 31:  # probably issue number nn
                                                day = 1
                                        else:
                                            day = 1
                                        newdatish = "%04d-%02d-%02d" % (
                                            year, month, day)
                                        try:
                                            _ = datetime.date(year, month, day)
                                            regex_pass = 1
                                            break
                                        except ValueError:
                                            regex_pass = 0
                                pos += 1

                            # MonthName DD YYYY or MonthName DD, YYYY
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and (pos - 1):
                                        month = month2num(
                                            nzbtitle_exploded[pos - 2])
                                        if month:
                                            day = check_int(
                                                nzbtitle_exploded[
                                                    pos - 1].rstrip(','), 1)
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 2
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # YYYY MM or YYYY MM DD
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year and pos + 1 < len(
                                            nzbtitle_exploded):
                                        month = check_int(
                                            nzbtitle_exploded[pos + 1], 0)
                                        if month:
                                            if pos + 2 < len(
                                                    nzbtitle_exploded):
                                                day = check_int(
                                                    nzbtitle_exploded[pos + 2],
                                                    1)
                                            else:
                                                day = 1
                                            try:
                                                _ = datetime.date(
                                                    year, month, day)
                                                newdatish = "%04d-%02d-%02d" % (
                                                    year, month, day)
                                                regex_pass = 3
                                                break
                                            except ValueError:
                                                regex_pass = 0
                                    pos += 1

                            # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    if nzbtitle_exploded[pos].lower() in [
                                            "issue", "no", "nr", "vol"
                                    ]:
                                        if pos + 1 < len(nzbtitle_exploded):
                                            issue = check_int(
                                                nzbtitle_exploded[pos + 1], 0)
                                            if issue:
                                                newdatish = str(
                                                    issue)  # 4 == 04 == 004
                                                if pos + 2 < len(
                                                        nzbtitle_exploded):
                                                    year = check_year(
                                                        nzbtitle_exploded[pos +
                                                                          2])
                                                    if year and year < int(
                                                            datetime.date.
                                                            today().year):
                                                        newdatish = '0'  # it's old
                                                    regex_pass = 4  # Issue/No/Nr/Vol nn, YYYY
                                                else:
                                                    regex_pass = 5  # Issue/No/Nr/Vol nn
                                                break
                                    pos += 1

                            # nn YYYY issue number without "Nr" before it
                            if not regex_pass:
                                pos = 1
                                while pos < len(nzbtitle_exploded):
                                    year = check_year(nzbtitle_exploded[pos])
                                    if year:
                                        issue = check_int(
                                            nzbtitle_exploded[pos - 1], 0)
                                        if issue:
                                            newdatish = str(
                                                issue)  # 4 == 04 == 004
                                            regex_pass = 6
                                            if year < int(datetime.date.today(
                                            ).year):
                                                newdatish = '0'  # it's old
                                            break
                                    pos += 1

                            # issue and year as a single 6 digit string eg 222015
                            if not regex_pass:
                                pos = 0
                                while pos < len(nzbtitle_exploded):
                                    issue = nzbtitle_exploded[pos]
                                    if issue.isdigit() and len(issue) == 6:
                                        year = int(issue[2:])
                                        issue = int(issue[:2])
                                        newdatish = str(
                                            issue)  # 4 == 04 == 004
                                        regex_pass = 7
                                        if year < int(
                                                datetime.date.today().year):
                                            newdatish = '0'  # it's old
                                        break
                                    pos += 1

                            if not regex_pass:
                                logger.debug(
                                    'Magazine %s not in a recognised date format.'
                                    % nzbtitle_formatted)
                                bad_date += 1
                                # allow issues with good name but bad date to be included
                                # so user can manually select them, incl those with issue numbers
                                newdatish = "1970-01-01"  # provide a fake date for bad-date issues
                                regex_pass = 99

                        if rejected:
                            rejects += 1
                        else:
                            if lazylibrarian.LOGLEVEL > 2:
                                logger.debug("regex %s [%s] %s" %
                                             (regex_pass, nzbtitle_formatted,
                                              newdatish))
                            # wanted issues go into wanted table marked "Wanted"
                            #  the rest into pastissues table marked "Skipped"
                            insert_table = "pastissues"
                            insert_status = "Skipped"

                            control_date = results['IssueDate']
                            if control_date is None:  # we haven't got any copies of this magazine yet
                                # get a rough time just over a month ago to compare to, in format yyyy-mm-dd
                                # could perhaps calc differently for weekly, biweekly etc
                                # or for magazines with only an issue number, use zero

                                if str(newdatish).isdigit():
                                    logger.debug(
                                        'Magazine comparing issue numbers (%s)'
                                        % newdatish)
                                    control_date = 0
                                elif re.match('\d+-\d\d-\d\d', str(newdatish)):
                                    start_time = time.time()
                                    start_time -= int(
                                        lazylibrarian.CONFIG['MAG_AGE']
                                    ) * 24 * 60 * 60  # number of seconds in days
                                    if start_time < 0:  # limit of unixtime (1st Jan 1970)
                                        start_time = 0
                                    control_date = time.strftime(
                                        "%Y-%m-%d", time.localtime(start_time))
                                    logger.debug(
                                        'Magazine date comparing to %s' %
                                        control_date)
                                else:
                                    logger.debug(
                                        'Magazine unable to find comparison type [%s]'
                                        % newdatish)
                                    control_date = 0

                            if str(control_date).isdigit() and str(
                                    newdatish).isdigit():
                                # for issue numbers, check if later than last one we have
                                comp_date = int(newdatish) - int(control_date)
                                newdatish = "%s" % newdatish
                                newdatish = newdatish.zfill(
                                    4)  # pad so we sort correctly
                            elif re.match('\d+-\d\d-\d\d', str(control_date)) and \
                                    re.match('\d+-\d\d-\d\d', str(newdatish)):
                                # only grab a copy if it's newer than the most recent we have,
                                # or newer than a month ago if we have none
                                comp_date = datecompare(
                                    newdatish, control_date)
                            else:
                                # invalid comparison of date and issue number
                                if re.match('\d+-\d\d-\d\d',
                                            str(control_date)):
                                    logger.debug(
                                        'Magazine %s failed: Expecting a date'
                                        % nzbtitle_formatted)
                                else:
                                    logger.debug(
                                        'Magazine %s failed: Expecting issue number'
                                        % nzbtitle_formatted)
                                bad_date += 1
                                newdatish = "1970-01-01"  # this is our fake date for ones we can't decipher
                                comp_date = 0

                            if comp_date > 0:
                                # keep track of what we're going to download so we don't download dupes
                                new_date += 1
                                issue = bookid + ',' + newdatish
                                if issue not in issues:
                                    maglist.append({
                                        'bookid': bookid,
                                        'nzbprov': nzbprov,
                                        'nzbtitle': nzbtitle,
                                        'nzburl': nzburl,
                                        'nzbmode': nzbmode
                                    })
                                    logger.debug(
                                        'This issue of %s is new, downloading'
                                        % nzbtitle_formatted)
                                    issues.append(issue)
                                    logger.debug('Magazine request number %s' %
                                                 len(issues))
                                    if lazylibrarian.LOGLEVEL > 2:
                                        logger.debug(str(issues))
                                    insert_table = "wanted"
                                    insert_status = "Wanted"
                                    nzbdate = now()  # when we asked for it
                                else:
                                    logger.debug(
                                        'This issue of %s is already flagged for download'
                                        % issue)
                            else:
                                if newdatish != "1970-01-01":  # this is our fake date for ones we can't decipher
                                    logger.debug(
                                        'This issue of %s is old; skipping.' %
                                        nzbtitle_formatted)
                                    old_date += 1

                            # store only the _new_ matching results
                            #  Don't add a new entry if this issue has been found on an earlier search
                            #  and status has been user-set ( we only delete the "Skipped" ones )
                            #  In "wanted" table it might be already snatched/downloading/processing

                            mag_entry = myDB.match(
                                'SELECT * from %s WHERE NZBtitle=? and NZBprov=?'
                                % insert_table, (nzbtitle, nzbprov))
                            if mag_entry:
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug(
                                        '%s is already in %s marked %s' %
                                        (nzbtitle, insert_table,
                                         insert_status))
                            else:
                                controlValueDict = {
                                    "NZBtitle": nzbtitle,
                                    "NZBprov": nzbprov
                                }
                                newValueDict = {
                                    "NZBurl": nzburl,
                                    "BookID": bookid,
                                    "NZBdate": nzbdate,
                                    "AuxInfo": newdatish,
                                    "Status": insert_status,
                                    "NZBsize": nzbsize,
                                    "NZBmode": nzbmode
                                }
                                myDB.upsert(insert_table, newValueDict,
                                            controlValueDict)
                                if lazylibrarian.LOGLEVEL > 2:
                                    logger.debug('Added %s to %s marked %s' %
                                                 (nzbtitle, insert_table,
                                                  insert_status))

                msg = 'Found %i result%s for %s. %i new,' % (
                    total_nzbs, plural(total_nzbs), bookid, new_date)
                msg += ' %i old, %i fail date, %i fail name,' % (
                    old_date, bad_date, bad_name)
                msg += ' %i rejected: %i to download' % (rejects, len(maglist))
                logger.info(msg)

                for magazine in maglist:
                    if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]:
                        snatch = TORDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    else:
                        snatch = NZBDownloadMethod(magazine['bookid'],
                                                   magazine['nzbtitle'],
                                                   magazine['nzburl'],
                                                   'magazine')
                    if snatch:
                        logger.info(
                            'Downloading %s from %s' %
                            (magazine['nzbtitle'], magazine["nzbprov"]))
                        notify_snatch("Magazine %s from %s at %s" %
                                      (unaccented(magazine['nzbtitle']),
                                       magazine["nzbprov"], now()))
                        custom_notify_snatch(magazine['bookid'])
                        scheduleJob(action='Start', target='processDir')

        if reset:
            scheduleJob(action='Restart', target='search_magazines')

        logger.info("Search for magazines complete")

    except Exception:
        logger.error('Unhandled exception in search_magazines: %s' %
                     traceback.format_exc())
    finally:
        threading.currentThread().name = "WEBSERVER"

Example #46

Show file

File: downloadmethods.py Project: andyuc88/LazyLibrarian

def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet:?'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    elif 'magnet:?' in tor_url:
        # discard any other parameters and just use the magnet link
        torrent = 'magnet:?' + tor_url.split('magnet:?')[1]
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = makeUnicode(value)  # ensure unicode
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add extra parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies)
            torrent = r.content
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            # some jackett providers redirect internally using http 301 to a magnet link
            # which requests can't handle, so throws an exception
            if "magnet:?" in str(e):
                torrent = 'magnet:?' + str(e).split('magnet:?')[1]. strip("'")
            else:
                if hasattr(e, 'reason'):
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason))
                else:
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm '
                    setperm(tor_path)
                    msg += 'Saved '
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.warn("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved '
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.warn("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    hashid = CalcTorrentHash(torrent)
    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        else:
            status = qbittorrent.addTorrent(tor_url, hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug("TORRENT %s [%s] [%s]" % (len(torrent), torrent[:20], torrent[-20:]))
        Source = "TRANSMISSION"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            downloadID = transmission.addTorrent(torrent)  # returns id or False
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            downloadID = transmission.addTorrent(torrent)
        elif torrent:
            downloadID = transmission.addTorrent(None, metainfo=b64encode(torrent))
        else:
            downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = hashid
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif torrent:
                downloadID = deluge.addTorrent(tor_title, data=b64encode(torrent))
            else:
                downloadID = deluge.addTorrent(tor_url)  # can be link or magnet, returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                elif isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif torrent:
                    downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower())
                    result = client.call('core.get_torrent_status', downloadID, {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.error('DelugeRPC failed %s %s' % (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" % library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word))
                        break
                if rejected:
                    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,))
        elif library == 'AudioBook':
            myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,))
        myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
                    (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
    return False

Example #47

Show file

File: magazinescan.py Project: DobyTang/LazyLibrarian

def magazineScan(title=None):
    lazylibrarian.MAG_UPDATE = 1

    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        onetitle = title
        if onetitle:
            mag_path = lazylibrarian.CONFIG['MAG_DEST_FOLDER'].replace('$Title', onetitle)
        else:
            mag_path = os.path.dirname(lazylibrarian.CONFIG['MAG_DEST_FOLDER'])

        if lazylibrarian.CONFIG['MAG_RELATIVE']:
            mag_path = os.path.join(lazylibrarian.DIRECTORY('eBook'), mag_path)
        if PY2:
            mag_path = mag_path.encode(lazylibrarian.SYS_ENCODING)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            mags = myDB.select('select * from Issues')
            # check all the issues are still there, delete entry if not
            for mag in mags:
                title = mag['Title']
                issuedate = mag['IssueDate']
                issuefile = mag['IssueFile']

                if issuefile and not os.path.isfile(issuefile):
                    myDB.action('DELETE from Issues where issuefile=?', (issuefile,))
                    logger.info('Issue %s - %s deleted as not found on disk' % (title, issuedate))
                    controlValueDict = {"Title": title}
                    newValueDict = {
                        "LastAcquired": None,  # clear magazine dates
                        "IssueDate": None,  # we will fill them in again later
                        "LatestCover": None,
                        "IssueStatus": "Skipped"  # assume there are no issues now
                    }
                    myDB.upsert("magazines", newValueDict, controlValueDict)
                    logger.debug('Magazine %s details reset' % title)

            # now check the magazine titles and delete any with no issues
            if lazylibrarian.CONFIG['MAG_DELFOLDER']:
                mags = myDB.select('SELECT Title,count(Title) as counter from issues group by Title')
                for mag in mags:
                    title = mag['Title']
                    issues = mag['counter']
                    if not issues:
                        logger.debug('Magazine %s deleted as no issues found' % title)
                        myDB.action('DELETE from magazines WHERE Title=?', (title,))

        logger.info(' Checking [%s] for magazines' % mag_path)

        matchString = ''
        for char in lazylibrarian.CONFIG['MAG_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the MAG_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['MAG_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<title>.*?)") + '\.[' + booktypes + ']'
        title_pattern = re.compile(match, re.VERBOSE)
        match = matchString.replace("\\$\\I\\s\\s\\u\\e\\D\\a\\t\\e", "(?P<issuedate>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "") + '\.[' + booktypes + ']'
        date_pattern = re.compile(match, re.VERBOSE)

        # try to ensure startdir is str as os.walk can fail if it tries to convert a subdir or file
        # to utf-8 and fails (eg scandinavian characters in ascii 8bit)
        for rootdir, dirnames, filenames in os.walk(makeBytestr(mag_path)):
            rootdir = makeUnicode(rootdir)
            filenames = [makeUnicode(item) for item in filenames]
            for fname in filenames:
                # maybe not all magazines will be pdf?
                if is_valid_booktype(fname, booktype='mag'):
                    issuedate = ''
                    # noinspection PyBroadException
                    try:
                        match = title_pattern.match(fname)
                        if match:
                            title = match.group("title")
                            issuedate = match.group("issuedate")
                            if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                logger.debug("Title pattern [%s][%s]" % (title, issuedate))
                            match = True
                        else:
                            logger.debug("Title pattern match failed for [%s]" % fname)
                    except Exception:
                        match = False

                    if not match:
                        # noinspection PyBroadException
                        try:
                            match = date_pattern.match(fname)
                            if match:
                                issuedate = match.group("issuedate")
                                title = os.path.basename(rootdir)
                                if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                                    logger.debug("Date pattern [%s][%s]" % (title, issuedate))
                                match = True
                            else:
                                logger.debug("Date pattern match failed for [%s]" % fname)
                        except Exception:
                            match = False

                    if not match:
                        title = os.path.basename(rootdir)
                        issuedate = ''

                    dic = {'.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ',
                           '#': '# '}
                    if issuedate:
                        exploded = replace_all(issuedate, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("Date regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        exploded = replace_all(fname, dic).split()
                        regex_pass, issuedate, year = lazylibrarian.searchmag.get_issue_date(exploded)
                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_magdates:
                            logger.debug("File regex [%s][%s][%s]" % (regex_pass, issuedate, year))
                        if not regex_pass:
                            issuedate = ''

                    if not issuedate:
                        logger.warn("Invalid name format for [%s]" % fname)
                        continue

                    issuefile = os.path.join(rootdir, fname)  # full path to issue.pdf
                    mtime = os.path.getmtime(issuefile)
                    iss_acquired = datetime.date.isoformat(datetime.date.fromtimestamp(mtime))

                    if lazylibrarian.CONFIG['MAG_RENAME']:
                        filedate = issuedate
                        if issuedate and issuedate.isdigit():
                            if len(issuedate) == 8:
                                if check_year(issuedate[:4]):
                                    filedate = 'Issue %d %s' % (int(issuedate[4:]), issuedate[:4])
                                else:
                                    filedate = 'Vol %d Iss %d' % (int(issuedate[:4]), int(issuedate[4:]))
                            elif len(issuedate) == 12:
                                filedate = 'Vol %d Iss %d %s' % (int(issuedate[4:8]), int(issuedate[8:]),
                                                                 issuedate[:4])
                            else:
                                filedate = str(issuedate).zfill(4)

                        extn = os.path.splitext(fname)[1]
                        newfname = lazylibrarian.CONFIG['MAG_DEST_FILE'].replace('$Title', title).replace(
                                                                                 '$IssueDate', filedate)
                        newfname = newfname + extn
                        if newfname and newfname != fname:
                            logger.debug("Rename %s -> %s" % (fname, newfname))
                            newissuefile = os.path.join(rootdir, newfname)
                            newissuefile = safe_move(issuefile, newissuefile)
                            if os.path.exists(issuefile.replace(extn, '.jpg')):
                                safe_move(issuefile.replace(extn, '.jpg'), newissuefile.replace(extn, '.jpg'))
                            if os.path.exists(issuefile.replace(extn, '.opf')):
                                safe_move(issuefile.replace(extn, '.opf'), newissuefile.replace(extn, '.opf'))
                            issuefile = newissuefile

                    logger.debug("Found %s Issue %s" % (title, issuedate))
                    controlValueDict = {"Title": title}

                    # is this magazine already in the database?
                    mag_entry = myDB.match(
                        'SELECT LastAcquired,IssueDate,MagazineAdded,CoverPage from magazines WHERE Title=?', (title,))
                    if not mag_entry:
                        # need to add a new magazine to the database
                        newValueDict = {
                            "Reject": None,
                            "Status": "Active",
                            "MagazineAdded": None,
                            "LastAcquired": None,
                            "LatestCover": None,
                            "IssueDate": None,
                            "IssueStatus": "Skipped",
                            "Regex": None,
                            "CoverPage": 1
                        }
                        logger.debug("Adding magazine %s" % title)
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                        magissuedate = None
                        magazineadded = None
                        maglastacquired = None
                        magcoverpage = 1
                    else:
                        maglastacquired = mag_entry['LastAcquired']
                        magissuedate = mag_entry['IssueDate']
                        magazineadded = mag_entry['MagazineAdded']
                        magissuedate = str(magissuedate).zfill(4)
                        magcoverpage = mag_entry['CoverPage']

                    issuedate = str(issuedate).zfill(4)  # for sorting issue numbers

                    # is this issue already in the database?
                    issue_id = create_id("%s %s" % (title, issuedate))
                    iss_entry = myDB.match('SELECT Title,IssueFile from issues WHERE Title=? and IssueDate=?',
                                           (title, issuedate))
                    new_entry = False
                    if not iss_entry or iss_entry['IssueFile'] != issuefile:
                        new_entry = True  # new entry or name changed
                        if not iss_entry:
                            logger.debug("Adding issue %s %s" % (title, issuedate))
                        else:
                            logger.debug("Updating issue %s %s" % (title, issuedate))
                        controlValueDict = {"Title": title, "IssueDate": issuedate}
                        newValueDict = {
                            "IssueAcquired": iss_acquired,
                            "IssueID": issue_id,
                            "IssueFile": issuefile
                        }
                        myDB.upsert("Issues", newValueDict, controlValueDict)

                    ignorefile = os.path.join(os.path.dirname(issuefile), '.ll_ignore')
                    with open(ignorefile, 'a'):
                        os.utime(ignorefile, None)

                    createMagCover(issuefile,  pagenum=magcoverpage, refresh=new_entry)
                    lazylibrarian.postprocess.processMAGOPF(issuefile, title, issuedate, issue_id, overwrite=new_entry)

                    # see if this issues date values are useful
                    controlValueDict = {"Title": title}
                    if not mag_entry:  # new magazine, this is the only issue
                        newValueDict = {
                            "MagazineAdded": iss_acquired,
                            "LastAcquired": iss_acquired,
                            "LatestCover": os.path.splitext(issuefile)[0] + '.jpg',
                            "IssueDate": issuedate,
                            "IssueStatus": "Open"
                        }
                        myDB.upsert("magazines", newValueDict, controlValueDict)
                    else:
                        # Set magazine_issuedate to issuedate of most recent issue we have
                        # Set latestcover to most recent issue cover
                        # Set magazine_added to acquired date of earliest issue we have
                        # Set magazine_lastacquired to acquired date of most recent issue we have
                        # acquired dates are read from magazine file timestamps
                        newValueDict = {"IssueStatus": "Open"}
                        if not magazineadded or iss_acquired < magazineadded:
                            newValueDict["MagazineAdded"] = iss_acquired
                        if not maglastacquired or iss_acquired > maglastacquired:
                            newValueDict["LastAcquired"] = iss_acquired
                        if not magissuedate or issuedate >= magissuedate:
                            newValueDict["IssueDate"] = issuedate
                            newValueDict["LatestCover"] = os.path.splitext(issuefile)[0] + '.jpg'
                        myDB.upsert("magazines", newValueDict, controlValueDict)

        if lazylibrarian.CONFIG['FULL_SCAN'] and not onetitle:
            magcount = myDB.match("select count(*) from magazines")
            isscount = myDB.match("select count(*) from issues")
            logger.info("Magazine scan complete, found %s magazine%s, %s issue%s" %
                        (magcount['count(*)'], plural(magcount['count(*)']),
                         isscount['count(*)'], plural(isscount['count(*)'])))
        else:
            logger.info("Magazine scan complete")
        lazylibrarian.MAG_UPDATE = 0

    except Exception:
        lazylibrarian.MAG_UPDATE = 0
        logger.error('Unhandled exception in magazineScan: %s' % traceback.format_exc())

Example #48

Show file

File: resultlist.py Project: andyuc88/LazyLibrarian

def findBestResult(resultlist, book, searchtype, source):
    """ resultlist: collated results from search providers
        book:       the book we want to find
        searchtype: book, magazine, shortbook, audiobook etc.
        source:     nzb, tor, rss, direct
        return:     highest scoring match, or None if no match
    """
    # noinspection PyBroadException
    try:
        myDB = database.DBConnection()
        dictrepl = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': ' ',
            '*': '',
            '(': '',
            ')': '',
            '[': '',
            ']': '',
            '#': '',
            '0': '',
            '1': '',
            '2': '',
            '3': '',
            '4': '',
            '5': '',
            '6': '',
            '7': '',
            '8': '',
            '9': '',
            '\'': '',
            ':': '',
            '!': '',
            '-': ' ',
            '\s\s': ' '
        }

        dic = {
            '...': '',
            '.': ' ',
            ' & ': ' ',
            ' = ': ' ',
            '?': '',
            '$': 's',
            ' + ': ' ',
            '"': '',
            ',': '',
            '*': '',
            ':': '.',
            ';': '',
            '\'': ''
        }

        if source == 'rss':
            author, title = get_searchterm(book, searchtype)
        else:
            author = unaccented_str(replace_all(book['authorName'], dic))
            title = unaccented_str(replace_all(book['bookName'], dic))

        if book['library'] == 'AudioBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXAUDIO'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINAUDIO'], 0)
            auxinfo = 'AudioBook'

        else:  # elif book['library'] == 'eBook':
            reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
            maxsize = check_int(lazylibrarian.CONFIG['REJECT_MAXSIZE'], 0)
            minsize = check_int(lazylibrarian.CONFIG['REJECT_MINSIZE'], 0)
            auxinfo = 'eBook'

        if source == 'nzb':
            prefix = 'nzb'
        else:  # rss and libgen return same names as torrents
            prefix = 'tor_'

        logger.debug('Searching %s %s results for best %s match' %
                     (len(resultlist), source, auxinfo))

        matches = []
        for res in resultlist:
            resultTitle = unaccented_str(
                replace_all(res[prefix + 'title'], dictrepl)).strip()
            resultTitle = re.sub(r"\s\s+", " ",
                                 resultTitle)  # remove extra whitespace
            Author_match = fuzz.token_set_ratio(author, resultTitle)
            Book_match = fuzz.token_set_ratio(title, resultTitle)
            if lazylibrarian.LOGLEVEL & lazylibrarian.log_fuzz:
                logger.debug("%s author/book Match: %s/%s %s at %s" %
                             (source.upper(), Author_match, Book_match,
                              resultTitle, res[prefix + 'prov']))

            rejected = False

            url = res[prefix + 'url']
            if url is None:
                rejected = True
                logger.debug("Rejecting %s, no URL found" % resultTitle)

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_FAILED']:
                already_failed = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=? and Status="Failed"',
                    (url, ))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" %
                                 (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and lazylibrarian.CONFIG['BLACKLIST_PROCESSED']:
                already_failed = myDB.match(
                    'SELECT * from wanted WHERE NZBurl=?', (url, ))
                if already_failed:
                    logger.debug("Rejecting %s, blacklisted at %s" %
                                 (resultTitle, already_failed['NZBprov']))
                    rejected = True

            if not rejected and not url.startswith(
                    'http') and not url.startswith('magnet'):
                rejected = True
                logger.debug("Rejecting %s, invalid URL [%s]" %
                             (resultTitle, url))

            if not rejected:
                for word in reject_list:
                    if word in getList(resultTitle.lower()) and word not in getList(author.lower()) \
                            and word not in getList(title.lower()):
                        rejected = True
                        logger.debug("Rejecting %s, contains %s" %
                                     (resultTitle, word))
                        break

            size_temp = check_int(
                res[prefix + 'size'],
                1000)  # Need to cater for when this is NONE (Issue 35)
            size = round(float(size_temp) / 1048576, 2)

            if not rejected and maxsize and size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % resultTitle)

            if not rejected and minsize and size < minsize:
                rejected = True
                logger.debug("Rejecting %s, too small" % resultTitle)

            if not rejected:
                bookid = book['bookid']
                # newTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
                # newTitle = resultTitle + ' LL.(' + book['bookid'] + ')'

                if source == 'nzb':
                    mode = res['nzbmode']  # nzb, torznab
                else:
                    mode = res[
                        'tor_type']  # torrent, magnet, nzb(from rss), direct

                controlValueDict = {"NZBurl": url}
                newValueDict = {
                    "NZBprov": res[prefix + 'prov'],
                    "BookID": bookid,
                    "NZBdate": now(),  # when we asked for it
                    "NZBsize": size,
                    "NZBtitle": resultTitle,
                    "NZBmode": mode,
                    "AuxInfo": auxinfo,
                    "Status": "Skipped"
                }

                score = (Book_match + Author_match) / 2  # as a percentage
                # lose a point for each unwanted word in the title so we get the closest match
                # but for RSS ignore anything at the end in square braces [keywords, genres etc]
                if source == 'rss':
                    wordlist = getList(resultTitle.rsplit('[', 1)[0].lower())
                else:
                    wordlist = getList(resultTitle.lower())
                words = [
                    x for x in wordlist if x not in getList(author.lower())
                ]
                words = [x for x in words if x not in getList(title.lower())]
                typelist = ''

                if newValueDict['AuxInfo'] == 'eBook':
                    words = [
                        x for x in words
                        if x not in getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
                elif newValueDict['AuxInfo'] == 'AudioBook':
                    words = [
                        x for x in words if x not in getList(
                            lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                    ]
                    typelist = getList(lazylibrarian.CONFIG['AUDIOBOOK_TYPE'])
                score -= len(words)
                # prioritise titles that include the ebook types we want
                # add more points for booktypes nearer the left in the list
                # eg if epub, mobi, pdf  add 3 points if epub found, 2 for mobi, 1 for pdf
                booktypes = [x for x in wordlist if x in typelist]
                if booktypes:
                    typelist = list(reversed(typelist))
                    for item in booktypes:
                        for i in [
                                i for i, x in enumerate(typelist) if x == item
                        ]:
                            score += i + 1

                matches.append(
                    [score, newValueDict, controlValueDict, res['priority']])

        if matches:
            highest = max(matches, key=lambda s: (s[0], s[3]))
            score = highest[0]
            newValueDict = highest[1]
            # controlValueDict = highest[2]
            dlpriority = highest[3]

            if score < int(lazylibrarian.CONFIG['MATCH_RATIO']):
                logger.info(
                    'Nearest match (%s%%): %s using %s search for %s %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     book['authorName'], book['bookName']))
            else:
                logger.info(
                    'Best match (%s%%): %s using %s search, %s priority %s' %
                    (score, newValueDict['NZBtitle'], searchtype,
                     newValueDict['NZBprov'], dlpriority))
            return highest
        else:
            logger.debug("No %s found for [%s] using searchtype %s" %
                         (source, book["searchterm"], searchtype))
        return None
    except Exception:
        logger.error('Unhandled exception in findBestResult: %s' %
                     traceback.format_exc())

Example #49

Show file

    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')
            else:
                title = ''
                authorname = ''

            fullterm = searchterm.replace(' <ll> ', '')
            logger.debug('Now searching Google Books API with searchterm: %s' %
                         fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + urllib.quote(
                        api_value +
                        searchterm.encode(lazylibrarian.SYS_ENCODING))
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        title = title.split(' (')[
                            0]  # with out any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace(
                        '"', '')  # and no quotes
                    searchterm = searchterm.strip()
                    set_url = set_url + \
                              urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    set_url = set_url + \
                              urllib.quote(api_value + '"' + searchterm.encode(lazylibrarian.SYS_ENCODING) + '"')
                    searchterm = searchterm.strip()

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urllib.urlencode(self.params)

                        try:
                            jsonresults, in_cache = get_json_request(URL)
                            if not jsonresults:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn(
                                    'Found no results for %s with value: %s' %
                                    (api_value, searchterm))
                                break
                            else:
                                pass
                        except HTTPError as err:
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while'
                                % err.reason)
                            break

                        startindex += 40

                        for item in jsonresults['items']:

                            total_count += 1

                            # skip if no author, no author is no book.
                            try:
                                Author = item['volumeInfo']['authors'][0]
                            except KeyError:
                                logger.debug(
                                    'Skipped a result without authorfield.')
                                no_author_count += 1
                                continue
                            try:
                                bookname = item['volumeInfo']['title']
                            except KeyError:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(
                                lazylibrarian.CONFIG['IMP_PREFLANG'])
                            booklang = ''
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = item['volumeInfo']['language']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' %
                                            (bookname, booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug(
                                        'Skipped %s where no language is found'
                                        % bookname)
                                    continue

                            try:
                                bookpub = item['volumeInfo']['publisher']
                            except KeyError:
                                bookpub = ""

                            try:
                                booksub = item['volumeInfo']['subtitle']
                            except KeyError:
                                booksub = ""

                            try:
                                bookdate = item['volumeInfo']['publishedDate']
                            except KeyError:
                                bookdate = '0000-00-00'
                            bookdate = bookdate[:4]

                            try:
                                bookimg = item['volumeInfo']['imageLinks'][
                                    'thumbnail']
                            except KeyError:
                                bookimg = 'images/nocover.png'

                            try:
                                bookrate = item['volumeInfo']['averageRating']
                            except KeyError:
                                bookrate = 0

                            try:
                                bookpages = item['volumeInfo']['pageCount']
                            except KeyError:
                                bookpages = '0'

                            try:
                                bookgenre = item['volumeInfo']['categories'][0]
                            except KeyError:
                                bookgenre = ""

                            try:
                                bookdesc = item['volumeInfo']['description']
                            except KeyError:
                                bookdesc = 'Not available'

                            try:
                                num_reviews = item['volumeInfo'][
                                    'ratingsCount']
                            except KeyError:
                                num_reviews = 0

                            try:
                                if item['volumeInfo']['industryIdentifiers'][
                                        0]['type'] == 'ISBN_10':
                                    bookisbn = item['volumeInfo'][
                                        'industryIdentifiers'][0]['identifier']
                                else:
                                    bookisbn = 0
                            except KeyError:
                                bookisbn = 0

                            if authorname:
                                author_fuzz = fuzz.ratio(Author, authorname)
                            else:
                                author_fuzz = fuzz.ratio(Author, fullterm)

                            if title:
                                book_fuzz = fuzz.ratio(bookname, title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(bookname))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.ratio(bookname, fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2,
                                               isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(bookname, dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace
                            bookid = item['id']

                            author = myDB.select(
                                'SELECT AuthorID FROM authors WHERE AuthorName = "%s"'
                                % Author.replace('"', '""'))
                            if author:
                                AuthorID = author[0]['authorid']
                            else:
                                AuthorID = ''

                            resultlist.append({
                                'authorname':
                                Author,
                                'authorid':
                                AuthorID,
                                'bookid':
                                bookid,
                                'bookname':
                                bookname,
                                'booksub':
                                booksub,
                                'bookisbn':
                                bookisbn,
                                'bookpub':
                                bookpub,
                                'bookdate':
                                bookdate,
                                'booklang':
                                booklang,
                                'booklink':
                                item['volumeInfo']['canonicalVolumeLink'],
                                'bookrate':
                                float(bookrate),
                                'bookimg':
                                bookimg,
                                'bookpages':
                                bookpages,
                                'bookgenre':
                                bookgenre,
                                'bookdesc':
                                bookdesc,
                                'author_fuzz':
                                author_fuzz,
                                'book_fuzz':
                                book_fuzz,
                                'isbn_fuzz':
                                isbn_fuzz,
                                'highest_fuzz':
                                highest_fuzz,
                                'num_reviews':
                                num_reviews
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug(
                    "Returning %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" %
                         (no_author_count, plural(no_author_count)))
            logger.debug(
                'The Google Books API was hit %s time%s for searchterm: %s' %
                (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' %
                         traceback.format_exc())

Example #50

Show file

def TORDownloadMethod(bookid=None,
                      tor_title=None,
                      tor_url=None,
                      library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    torrent = ''

    full_url = tor_url  # keep the url as stored in "wanted" table
    if 'magnet:?' in tor_url:
        # discard any other parameters and just use the magnet link
        tor_url = 'magnet:?' + tor_url.split('magnet:?')[1]
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC',
                                          value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = makeUnicode(value)  # ensure unicode
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add extra parameters
        if not tor_url.endswith('.torrent') and '.torrent' in tor_url:
            tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': getUserAgent()}
        proxies = proxyList()

        try:
            logger.debug("Fetching %s" % tor_url)
            r = requests.get(tor_url,
                             headers=headers,
                             timeout=90,
                             proxies=proxies)
            if str(r.status_code).startswith('2'):
                torrent = r.content
                if not len(torrent):
                    res = "Got empty response for %s" % tor_url
                    logger.warn(res)
                    return False, res
                elif len(torrent) < 100:
                    res = "Only got %s bytes for %s" % (len(torrent), tor_url)
                    logger.warn(res)
                    return False, res
                else:
                    logger.debug("Got %s bytes for %s" %
                                 (len(torrent), tor_url))
            else:
                res = "Got a %s response for %s" % (r.status_code, tor_url)
                logger.warn(res)
                return False, res

        except requests.exceptions.Timeout:
            res = 'Timeout fetching file from url: %s' % tor_url
            logger.warn(res)
            return False, res
        except Exception as e:
            # some jackett providers redirect internally using http 301 to a magnet link
            # which requests can't handle, so throws an exception
            logger.debug("Requests exception: %s" % str(e))
            if "magnet:?" in str(e):
                tor_url = 'magnet:?' + str(e).split('magnet:?')[1].strip("'")
                logger.debug("Redirecting to %s" % tor_url)
            else:
                if hasattr(e, 'reason'):
                    res = '%s fetching file from url: %s, %s' % (
                        type(e).__name__, tor_url, e.reason)
                else:
                    res = '%s fetching file from url: %s, %s' % (
                        type(e).__name__, tor_url, str(e))
                logger.warn(res)
                return False, res

    if not torrent and not tor_url.startswith('magnet:?'):
        res = "No magnet or data, cannot continue"
        logger.warn(res)
        return False, res

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = calculate_torrent_hash(tor_url)
                if not hashid:
                    hashid = tor_name
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm '
                    setperm(tor_path)
                    msg += 'Saved '
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    res = "Failed to write magnet to file: %s %s" % (
                        type(e).__name__, str(e))
                    logger.warn(res)
                    logger.debug("Progress: %s Filename [%s]" %
                                 (msg, repr(tor_path)))
                    return False, res
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                    tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved '
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                res = "Failed to write torrent to file: %s %s" % (
                    type(e).__name__, str(e))
                logger.warn(res)
                logger.debug("Progress: %s Filename [%s]" %
                             (msg, repr(tor_path)))
                return False, res

    hashid = calculate_torrent_hash(tor_url, torrent)
    if not hashid:
        res = "Unable to calculate torrent hash from url/data"
        logger.error(res)
        logger.debug("url: %s" % tor_url)
        logger.debug("data: %s" % makeUnicode(str(torrent[:50])))
        return False, res

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[
            'UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        downloadID, res = utorrent.addTorrent(tor_url,
                                              hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[
            'RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        if torrent:
            logger.debug("Sending %s data to rTorrent" % tor_title)
            downloadID, res = rtorrent.addTorrent(tor_title,
                                                  hashid,
                                                  data=torrent)
        else:
            logger.debug("Sending %s url to rTorrent" % tor_title)
            downloadID, res = rtorrent.addTorrent(
                tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[
                'QBITTORRENT_HOST']:
        Source = "QBITTORRENT"
        if torrent:
            logger.debug("Sending %s data to qBittorrent" % tor_title)
            status, res = qbittorrent.addFile(torrent, hashid, tor_title)
        else:
            logger.debug("Sending %s url to qBittorrent" % tor_title)
            status, res = qbittorrent.addTorrent(
                tor_url, hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[
                'TRANSMISSION_HOST']:
        Source = "TRANSMISSION"
        if torrent:
            logger.debug("Sending %s data to Transmission" % tor_title)
            # transmission needs b64encoded metainfo to be unicode, not bytes
            downloadID, res = transmission.addTorrent(None,
                                                      metainfo=makeUnicode(
                                                          b64encode(torrent)))
        else:
            logger.debug("Sending %s url to Transmission" % tor_title)
            downloadID, res = transmission.addTorrent(
                tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = hashid
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s url to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID, res = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[
            'DELUGE_HOST']:
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            if torrent:
                logger.debug("Sending %s data to Deluge" % tor_title)
                downloadID, res = deluge.addTorrent(tor_title,
                                                    data=b64encode(torrent))
            else:
                logger.debug("Sending %s url to Deluge" % tor_title)
                downloadID, res = deluge.addTorrent(
                    tor_url)  # can be link or magnet, returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
            else:
                return False, res
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    res = "Sending %s magnet to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_magnet',
                                             tor_url, args)
                elif torrent:
                    res = "Sending %s data to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_file',
                                             tor_title, b64encode(torrent),
                                             args)
                else:
                    res = "Sending %s url to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_url', tor_url,
                                             args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call(
                            'label.set_torrent', downloadID,
                            lazylibrarian.CONFIG['DELUGE_LABEL'].lower())
                    result = client.call('core.get_torrent_status', downloadID,
                                         {})
                    if 'name' in result:
                        tor_title = result['name']
                else:
                    res += ' failed'
                    logger.error(res)
                    return False, res

            except Exception as e:
                res = 'DelugeRPC failed %s %s' % (type(e).__name__, str(e))
                logger.error(res)
                return False, res

    if not Source:
        res = 'No torrent download method is enabled, check config.'
        logger.warn(res)
        return False, res

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn(
                    '%s: name contains hash, probably unresolved magnet' %
                    Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'Magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'],
                                          ',')
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'],
                                          ',')
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'],
                                          ',')
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" %
                                 library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = "Rejecting torrent name %s, contains %s" % (
                            tor_title, word)
                        logger.debug(rejected)
                        break
                if not rejected:
                    rejected = check_contents(Source, downloadID, library,
                                              tor_title)
                if rejected:
                    myDB.action(
                        'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                        (rejected, full_url))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' %
                                 (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?',
                                (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, full_url))
        return True, ''

    res = 'Failed to send torrent to %s' % Source
    logger.error(res)
    return False, res

Example #51

Show file

    def find_book(bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = get_json_request(URL)

        if not jsonresults:
            logger.debug('No results found for %s' % bookid)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            if booklang not in valid_langs and 'All' not in valid_langs:
                logger.debug(
                    'Book %s googlebooks language does not match preference, %s'
                    % (bookname, booklang))
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = ""

        series = ""
        seriesNum = ""
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = ""
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(
                    ')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = ""
        except KeyError:
            booksub = ""

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = ""

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = ""

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = ""
        except KeyError:
            bookisbn = ""

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match(
                'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName="%s"' %
                    author['authorname'])
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author as "ignored"
                    # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (authorname, bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link, success = cache_img("book", bookid, bookimg)
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)

Example #52

Show file

def DirectDownloadMethod(bookid=None,
                         dl_title=None,
                         dl_url=None,
                         library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = "DIRECT"

    logger.debug("Starting Direct Download for [%s]" % dl_title)
    proxies = proxyList()
    headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
    try:
        r = requests.get(dl_url, headers=headers, timeout=90, proxies=proxies)
    except requests.exceptions.Timeout:
        logger.warn('Timeout fetching file from url: %s' % dl_url)
        return False
    except Exception as e:
        if hasattr(e, 'reason'):
            logger.warn('%s fetching file from url: %s, %s' %
                        (type(e).__name__, dl_url, e.reason))
        else:
            logger.warn('%s fetching file from url: %s, %s' %
                        (type(e).__name__, dl_url, str(e)))
        return False

    if not str(r.status_code).startswith('2'):
        logger.debug("Got a %s response for %s" % (r.status_code, dl_url))
    elif len(r.content) < 1000:
        logger.debug("Only got %s bytes for %s, rejecting" %
                     (len(r.content), dl_title))
    else:
        extn = ''
        basename = ''
        if ' ' in dl_title:
            basename, extn = dl_title.rsplit(
                ' ', 1)  # last word is often the extension - but not always...
        if extn and extn in getList(lazylibrarian.CONFIG['EBOOK_TYPE']):
            dl_title = '.'.join(dl_title.rsplit(' ', 1))
        elif magic:
            mtype = magic.from_buffer(r.content)
            if 'EPUB' in mtype:
                extn = '.epub'
            elif 'Mobipocket' in mtype:  # also true for azw and azw3, does it matter?
                extn = '.mobi'
            elif 'PDF' in mtype:
                extn = '.pdf'
            else:
                logger.debug("magic reports %s" % mtype)
            basename = dl_title
        else:
            logger.warn("Don't know the filetype for %s" % dl_title)
            basename = dl_title

        logger.debug("File download got %s bytes for %s" %
                     (len(r.content), dl_title))
        destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), basename)
        # destdir = os.path.join(lazylibrarian.DIRECTORY('Download'), '%s LL.(%s)' % (basename, bookid))
        if not os.path.isdir(destdir):
            _ = mymakedirs(destdir)

        try:
            hashid = dl_url.split("md5=")[1].split("&")[0]
        except IndexError:
            hashid = sha1(encode(dl_url)).hexdigest()

        destfile = os.path.join(destdir, basename + extn)
        try:
            with open(destfile, 'wb') as bookfile:
                bookfile.write(r.content)
            setperm(destfile)
            downloadID = hashid
        except Exception as e:
            logger.error("%s writing book to %s, %s" %
                         (type(e).__name__, destfile, e))

    if downloadID:
        logger.debug('File %s has been downloaded from %s' %
                     (dl_title, dl_url))
        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, dl_url))
        return True
    else:
        logger.error('Failed to download file @ <a href="%s">%s</a>' %
                     (dl_url, dl_url))
        myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                    (dl_url, ))
        return False

Example #53

Show file

File: email_notify.py Project: andyuc88/LazyLibrarian

    def notify_download(self, title, bookid=None, force=False):
        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_EMAIL'] and not force:
            return False

        if lazylibrarian.CONFIG['EMAIL_NOTIFY_ONDOWNLOAD']:
            files = None
            event = notifyStrings[NOTIFY_DOWNLOAD]
            logger.debug('Email send attachment is %s' % lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD'])
            if lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']:
                if not bookid:
                    logger.debug('Email request to attach book, but no bookid')
                else:
                    filename = None
                    preftype = None
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])

                    if lazylibrarian.CONFIG['HTTP_LOOK'] == 'legacy' or not lazylibrarian.CONFIG['USER_ACCOUNTS']:
                        preftype = typelist[0]
                        logger.debug('Preferred filetype = %s' % preftype)
                    else:
                        myDB = database.DBConnection()
                        cookie = cherrypy.request.cookie
                        if cookie and 'll_uid' in list(cookie.keys()):
                            res = myDB.match('SELECT BookType from users where UserID=?', (cookie['ll_uid'].value,))
                            if res and res['BookType']:
                                preftype = res['BookType']
                                logger.debug('User preferred filetype = %s' % preftype)
                        if not preftype:
                            logger.debug('Default preferred filetype = %s' % preftype)
                            preftype = typelist[0]

                    myDB = database.DBConnection()
                    data = myDB.match('SELECT BookFile,BookName from books where BookID=?', (bookid,))
                    if data:
                        bookfile = data['BookFile']
                        types = []
                        if bookfile and os.path.isfile(bookfile):
                            basename, extn = os.path.splitext(bookfile)
                            for item in typelist:
                                target = basename + '.' + item
                                if os.path.isfile(target):
                                    types.append(item)

                            logger.debug('Available filetypes: %s' % str(types))

                            if preftype in types:
                                filename = basename + '.' + preftype
                            else:
                                filename = basename + '.' + types[0]

                        title = data['BookName']
                        logger.debug('Found %s for bookid %s' % (filename, bookid))
                    else:
                        logger.debug('[%s] is not a valid bookid' % bookid)
                        data = myDB.match('SELECT IssueFile,Title,IssueDate from issues where IssueID=?', (bookid,))
                        if data:
                            filename = data['IssueFile']
                            title = "%s - %s" % (data['Title'], data['IssueDate'])
                            logger.debug('Found %s for issueid %s' % (filename, bookid))
                        else:
                            logger.debug('[%s] is not a valid issueid' % bookid)
                            filename = ''
                    if filename:
                        files = [filename]  # could add cover_image, opf
                        event = "LazyLibrarian Download"
            return self._notify(message=title, event=event, force=force, files=files)
        return False

Example #54

Show file

File: searchrss.py Project: Finch106/LazyLibrarian

def processResultList(resultlist, authorname, bookname, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {
        '...': '',
        '.': ' ',
        ' & ': ' ',
        ' = ': ' ',
        '?': '',
        '$': 's',
        ' + ': ' ',
        '"': '',
        ',': ' ',
        '*': '',
        '(': '',
        ')': '',
        '[': '',
        ']': '',
        '#': '',
        '0': '',
        '1': '',
        '2': '',
        '3': '',
        '4': '',
        '5': '',
        '6': '',
        '7': '',
        '8': '',
        '9': '',
        '\'': '',
        ':': '',
        '!': '',
        '-': ' ',
        '\s\s': ' '
    }

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = getList(lazylibrarian.REJECT_WORDS)

    matches = []

    # bit of a misnomer now, rss can search both tor and nzb rss feeds
    for tor in resultlist:
        torTitle = unaccented_str(replace_all(tor['tor_title'],
                                              dictrepl)).strip()
        torTitle = re.sub(r"\s\s+", " ", torTitle)  # remove extra whitespace

        tor_Author_match = fuzz.token_set_ratio(authorname, torTitle)
        tor_Title_match = fuzz.token_set_ratio(bookname, torTitle)
        logger.debug("RSS Author/Title Match: %s/%s for %s" %
                     (tor_Author_match, tor_Title_match, torTitle))
        tor_url = tor['tor_url']

        rejected = False

        already_failed = myDB.match(
            'SELECT * from wanted WHERE NZBurl="%s" and Status="Failed"' %
            tor_url)
        if already_failed:
            logger.debug("Rejecting %s, blacklisted at %s" %
                         (torTitle, already_failed['NZBprov']))
            rejected = True

        if not rejected:
            for word in reject_list:
                if word in torTitle.lower() and word not in authorname.lower(
                ) and word not in bookname.lower():
                    rejected = True
                    logger.debug("Rejecting %s, contains %s" %
                                 (torTitle, word))
                    break

        tor_size_temp = tor[
            'tor_size']  # Need to cater for when this is NONE (Issue 35)
        if tor_size_temp is None:
            tor_size_temp = 1000
        tor_size = round(float(tor_size_temp) / 1048576, 2)
        maxsize = check_int(lazylibrarian.REJECT_MAXSIZE, 0)

        if not rejected:
            if maxsize and tor_size > maxsize:
                rejected = True
                logger.debug("Rejecting %s, too large" % torTitle)

        if not rejected:
            bookid = book['bookid']
            tor_Title = (book["authorName"] + ' - ' + book['bookName'] +
                         ' LL.(' + book['bookid'] + ')').strip()
            tor_prov = tor['tor_prov']
            tor_feed = tor['tor_feed']

            controlValueDict = {"NZBurl": tor_url}
            newValueDict = {
                "NZBprov": tor_prov,
                "BookID": bookid,
                "NZBdate": now(),  # when we asked for it
                "NZBsize": tor_size,
                "NZBtitle": tor_Title,
                "NZBmode": "torrent",
                "Status": "Skipped"
            }

            score = (tor_Title_match + tor_Author_match) / 2  # as a percentage
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(torTitle))
            words -= len(getList(authorname))
            words -= len(getList(bookname))
            score -= abs(words)
            matches.append([score, torTitle, newValueDict, controlValueDict])

    if matches:
        highest = max(matches, key=lambda x: x[0])
        score = highest[0]
        nzb_Title = highest[1]
        newValueDict = highest[2]
        controlValueDict = highest[3]

        if score < match_ratio:
            logger.debug(
                u'Nearest RSS match (%s%%): %s using %s search for %s %s' %
                (score, nzb_Title, searchtype, authorname, bookname))
            return False

        logger.info(u'Best RSS match (%s%%): %s using %s search' %
                    (score, nzb_Title, searchtype))

        snatchedbooks = myDB.match(
            'SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
            newValueDict["BookID"])

        if snatchedbooks:  # check if one of the other downloaders got there first
            logger.info('%s already marked snatched' % nzb_Title)
            return True
        else:
            myDB.upsert("wanted", newValueDict, controlValueDict)
            tor_url = controlValueDict["NZBurl"]
            if '.nzb' in tor_url:
                snatch = NZBDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"],
                                           controlValueDict["NZBurl"])
            else:
                """
                #  http://baconbits.org/torrents.php?action=download&authkey=<authkey>&torrent_pass=<password.hashed>&id=185398
                if not tor_url.startswith('magnet'):  # magnets don't use auth
                    pwd = lazylibrarian.RSS_PROV[tor_feed]['PASS']
                    auth = lazylibrarian.RSS_PROV[tor_feed]['AUTH']
                    # don't know what form of password hash is required, try sha1
                    tor_url = tor_url.replace('<authkey>', auth).replace('<password.hashed>', sha1(pwd))
                """
                snatch = TORDownloadMethod(newValueDict["BookID"],
                                           newValueDict["NZBprov"],
                                           newValueDict["NZBtitle"], tor_url)

            if snatch:
                logger.info(
                    'Downloading %s from %s' %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"]))
                notify_snatch(
                    "%s from %s at %s" %
                    (newValueDict["NZBtitle"], newValueDict["NZBprov"], now()))
                scheduleJob(action='Start', target='processDir')
                return True + True  # we found it
    else:
        logger.debug("No RSS found for " +
                     (book["authorName"] + ' ' + book['bookName']).strip())
    return False

Example #55

Show file

File: manualbook.py Project: kuuratsanik/LazyLibrarian

def searchItem(item=None, bookid=None):
    """
    Call all active search providers asking for a "general" search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    """
    results = []

    if not item:
        return results

    if not internet():
        logger.debug('Search Item: No internet connection')
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    nproviders = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR(
    ) + lazylibrarian.USE_RSS()
    logger.debug('Searching %s providers for %s' % (nproviders, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nproviders = IterateOverNewzNabSites(book, 'general')
        if nproviders:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nproviders = IterateOverTorrentSites(book, 'general')
        if nproviders:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nproviders = IterateOverRSSSites()
        if nproviders:
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbprov' in item:
            provider = item['nzbprov']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_prov' in item:
            provider = item['tor_prov']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage
            score = fuzz.token_set_ratio(searchterm, title)
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                url = url.split('?')[0]
                result = {
                    'score': score,
                    'title': title,
                    'provider': provider,
                    'size': size,
                    'date': date,
                    'url': urllib.quote_plus(url),
                    'mode': mode
                }

                searchresults.append(result)

            # from operator import itemgetter
            # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True)

    logger.debug('Found %s results for %s' % (len(searchresults), searchterm))
    return searchresults

Example #56

Show file

File: searchnzb.py Project: NVRemoteDev/LazyLibrarian

def processResultList(resultlist, book, searchtype):
    myDB = database.DBConnection()
    dictrepl = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
                ',': ' ', '*': '', '(': '', ')': '', '[': '', ']': '', '#': '', '0': '', '1': '',
                '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '\'': '',
                ':': '', '!': '', '-': ' ', '\s\s': ' '}
                # ' the ': ' ', ' a ': ' ', ' and ': ' ',
                # ' to ': ' ', ' of ': ' ', ' for ': ' ', ' my ': ' ', ' in ': ' ', ' at ': ' ', ' with ': ' '}

    dic = {'...': '', '.': ' ', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '',
           ',': '', '*': '', ':': '', ';': ''}

    match_ratio = int(lazylibrarian.MATCH_RATIO)
    reject_list = formatter.getList(lazylibrarian.REJECT_WORDS)

    for nzb in resultlist:
        nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip()
        nzbTitle = re.sub(r"\s\s+", " ", nzbTitle)  # remove extra whitespace

        author = formatter.latinToAscii(formatter.replace_all(book['authorName'], dic))
        title = formatter.latinToAscii(formatter.replace_all(book['bookName'], dic))
        # nzbTitle_match = fuzz.token_set_ratio(book['searchterm'], nzbTitle)
        # logger.debug(u"NZB Title sort Match %: " + str(nzbTitle_match) + " for " + nzbTitle)
        nzbAuthor_match = fuzz.token_set_ratio(author, nzbTitle)
        nzbBook_match = fuzz.token_set_ratio(title, nzbTitle)
        logger.debug(u"NZB author/book Match: %s/%s for %s" % (nzbAuthor_match, nzbBook_match, nzbTitle))

        rejected = False
        for word in reject_list:
            if word in nzbTitle.lower() and not word in author.lower() and not word in title.lower():
                rejected = True
                logger.debug("Rejecting %s, contains %s" % (nzbTitle, word))
                break

        if (nzbAuthor_match >= match_ratio and nzbBook_match >= match_ratio and not rejected):
            logger.debug(u'Found NZB: %s using %s search' % (nzb['nzbtitle'], searchtype))
            bookid = book['bookid']
            nzbTitle = (author + ' - ' + title + ' LL.(' + book['bookid'] + ')').strip()
            nzburl = nzb['nzburl']
            nzbprov = nzb['nzbprov']
            nzbdate_temp = nzb['nzbdate']
            nzbsize_temp = nzb['nzbsize']  # Need to cater for when this is NONE (Issue 35)
            if nzbsize_temp is None:
                nzbsize_temp = 1000
            nzbsize = str(round(float(nzbsize_temp) / 1048576, 2)) + ' MB'
            nzbdate = formatter.nzbdate2format(nzbdate_temp)
            nzbmode = nzb['nzbmode']
            controlValueDict = {"NZBurl": nzburl}
            newValueDict = {
                "NZBprov": nzbprov,
                "BookID": bookid,
                "NZBdate": formatter.now(),  # when we asked for it
                "NZBsize": nzbsize,
                "NZBtitle": nzbTitle,
                "NZBmode": nzbmode,
                "Status": "Skipped"
            }
            myDB.upsert("wanted", newValueDict, controlValueDict)

            snatchedbooks = myDB.action('SELECT * from books WHERE BookID="%s" and Status="Snatched"' %
                                        bookid).fetchone()
            if not snatchedbooks:
                if nzbmode == "torznab":
                    snatch = TORDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                else:
                    snatch = NZBDownloadMethod(bookid, nzbprov, nzbTitle, nzburl)
                if snatch:
                    notifiers.notify_snatch(nzbTitle + ' at ' + formatter.now())
                    common.schedule_job(action='Start', target='processDir')
                    return True

    logger.debug("No nzb's found for " + (book["authorName"] + ' ' + book['bookName']).strip() +
                 " using searchtype " + searchtype)
    return False

Example #57

Show file

File: email_notify.py Project: DarkSir23/LazyLibrarian

    def notify_download(self, title, bookid=None, force=False):
        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_EMAIL'] and not force:
            return False

        if lazylibrarian.CONFIG['EMAIL_NOTIFY_ONDOWNLOAD'] or force:
            files = None
            event = notifyStrings[NOTIFY_DOWNLOAD]
            logger.debug('Email send attachment is %s' %
                         lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD'])
            if lazylibrarian.CONFIG['EMAIL_SENDFILE_ONDOWNLOAD']:
                if not bookid:
                    logger.debug('Email request to attach book, but no bookid')
                else:
                    filename = None
                    preftype = None
                    typelist = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])

                    if lazylibrarian.CONFIG[
                            'HTTP_LOOK'] == 'legacy' or not lazylibrarian.CONFIG[
                                'USER_ACCOUNTS']:
                        preftype = typelist[0]
                        logger.debug('Preferred filetype = %s' % preftype)
                    else:
                        myDB = database.DBConnection()
                        cookie = cherrypy.request.cookie
                        if cookie and 'll_uid' in list(cookie.keys()):
                            res = myDB.match(
                                'SELECT BookType from users where UserID=?',
                                (cookie['ll_uid'].value, ))
                            if res and res['BookType']:
                                preftype = res['BookType']
                                logger.debug('User preferred filetype = %s' %
                                             preftype)
                        if not preftype:
                            logger.debug('Default preferred filetype = %s' %
                                         preftype)
                            preftype = typelist[0]

                    myDB = database.DBConnection()
                    data = myDB.match(
                        'SELECT BookFile,BookName from books where BookID=?',
                        (bookid, ))
                    if data:
                        bookfile = data['BookFile']
                        types = []
                        if bookfile and os.path.isfile(bookfile):
                            basename, extn = os.path.splitext(bookfile)
                            for item in typelist:
                                target = basename + '.' + item
                                if os.path.isfile(target):
                                    types.append(item)

                            logger.debug('Available filetypes: %s' %
                                         str(types))

                            if preftype in types:
                                filename = basename + '.' + preftype
                            else:
                                filename = basename + '.' + types[0]

                        title = data['BookName']
                        logger.debug('Found %s for bookid %s' %
                                     (filename, bookid))
                    else:
                        logger.debug('[%s] is not a valid bookid' % bookid)
                        data = myDB.match(
                            'SELECT IssueFile,Title,IssueDate from issues where IssueID=?',
                            (bookid, ))
                        if data:
                            filename = data['IssueFile']
                            title = "%s - %s" % (data['Title'],
                                                 data['IssueDate'])
                            logger.debug('Found %s for issueid %s' %
                                         (filename, bookid))
                        else:
                            logger.debug('[%s] is not a valid issueid' %
                                         bookid)
                            filename = ''
                    if filename:
                        files = [filename]  # could add cover_image, opf
                        event = "LazyLibrarian Download"
            return self._notify(message=title,
                                event=event,
                                force=force,
                                files=files)
        return False