Exemple #1
0
def get_searchterm(book, searchType):
    authorname = cleanName(book['authorName'], "'")
    bookname = cleanName(book['bookName'], "'")
    if searchType in ['book', 'audio'] or 'short' in searchType:
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt
        # and Charles H. Elliott, Phd -> Charles Elliott
        # but Tom Holt -> Tom Holt
        # Calibre directories may have trailing '.' replaced by '_'  eg Jr_
        if ' ' in authorname:
            authorname_exploded = authorname.split(' ')
            authorname = ''
            postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
            for word in authorname_exploded:
                word = word.strip('.').strip('_')
                if len(word) > 1 and word.lower() not in postfix:
                    if authorname:
                        authorname += ' '
                    authorname += word

        if 'short' in searchType and '(' in bookname:
            bookname = bookname.split('(')[0].strip()

    return authorname, bookname
Exemple #2
0
def get_searchterm(book, searchType):
    authorname = book['authorName']
    bookname = book['bookName']
    if searchType == "book" or searchType == "shortbook":
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(bookname)
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        if searchType == "book":
            return authorname, bookname

        if searchType == "shortbook" and '(' in bookname:
            bookname = bookname.split('(')[0].strip()
            return authorname, bookname

    # any other searchType
    return authorname, bookname
Exemple #3
0
def get_searchterm(book, searchType):
    authorname = book['authorName']
    bookname = book['bookName']
    if searchType == "book" or searchType == "shortbook":
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(bookname)
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        if searchType == "book":
            return authorname, bookname

        if searchType == "shortbook" and '(' in bookname:
            bookname = bookname.split('(')[0].strip()
            return authorname, bookname

    # any other searchType
    return authorname, bookname
Exemple #4
0
def get_searchterm(book, searchType):
    authorname = cleanName(book['authorName'], "'")
    bookname = cleanName(book['bookName'], "'")
    if searchType in ['book', 'audio'] or 'short' in searchType:
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]
        bookname = bookname.strip()

        # no initials or extensions after surname eg L. E. Modesitt Jr. -> Modesitt
        # and Charles H. Elliott, Phd -> Charles Elliott
        # but Tom Holt -> Tom Holt
        # Calibre directories may have trailing '.' replaced by '_'  eg Jr_
        if ' ' in authorname:
            authorname_exploded = authorname.split(' ')
            authorname = ''
            postfix = getList(lazylibrarian.CONFIG['NAME_POSTFIX'])
            for word in authorname_exploded:
                word = word.strip('.').strip('_')
                if len(word) > 1 and word.lower() not in postfix:
                    if authorname:
                        authorname += ' '
                    authorname += word

        if 'short' in searchType and '(' in bookname:
            bookname = bookname.split('(')[0].strip()

    return authorname, bookname
Exemple #5
0
def getWorkSeries(bookID=None):
    """ Return the series names and numbers in series for the given bookid as a dictionary """
    seriesdict = {}
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return seriesdict

    work = getBookWork(bookID, "Series")
    if work:
        try:
            serieslist = work.split('<h3><b>Series:')[1].split(
                '</h3>')[0].split('<a href="/series/')
            for item in serieslist[1:]:
                try:
                    series = item.split('">')[1].split('</a>')[0]
                    if series and '(' in series:
                        seriesnum = series.split('(')[1].split(')')[0].strip()
                        series = series.split(' (')[0].strip()
                    else:
                        seriesnum = ''
                        series = series.strip()
                    series = cleanName(unaccented(series), '&/')
                    seriesnum = cleanName(unaccented(seriesnum))
                    seriesdict[series] = seriesnum
                except IndexError:
                    pass
        except IndexError:
            pass

    return seriesdict
Exemple #6
0
def getWorkSeries(bookID=None):
    """ Return the series names and numbers in series for the given id as a list of tuples
        For goodreads the id is a WorkID, for librarything it's a BookID """
    myDB = database.DBConnection()
    serieslist = []
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return serieslist

    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        URL = "https://www.goodreads.com/work/"
        seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG['GR_API']

        rootxml, in_cache = gr_xml_request(seriesurl)
        if rootxml is None:
            logger.warn('Error getting XML for %s' % seriesurl)
        else:
            resultxml = rootxml.getiterator('series_work')
            for item in resultxml:
                try:
                    seriesname = item.find('./series/title').text
                    seriesname = seriesname.strip('\n').strip('\n').strip()
                    seriesid = item.find('./series/id').text
                    seriesnum = item.find('./user_position').text
                except (KeyError, AttributeError):
                    continue
                if seriesname and seriesid:
                    seriesname = cleanName(unaccented(seriesname), '&/')
                    seriesnum = cleanName(unaccented(seriesnum))
                    serieslist.append((seriesid, seriesnum, seriesname))
                    match = myDB.match('SELECT SeriesID from series WHERE SeriesName=?', (seriesname,))
                    if not match:
                        myDB.action('INSERT INTO series VALUES (?, ?, ?, ?, ?)',
                                    (seriesid, seriesname, "Active", 0, 0))
                    elif match['SeriesID'] != seriesid:
                        myDB.action('UPDATE series SET SeriesID=? WHERE SeriesName=?', (seriesid, seriesname))
    else:
        work = getBookWork(bookID, "Series")
        if work:
            try:
                slist = work.split('<h3><b>Series:')[1].split('</h3>')[0].split('<a href="/series/')
                for item in slist[1:]:
                    try:
                        series = item.split('">')[1].split('</a>')[0]
                        if series and '(' in series:
                            seriesnum = series.split('(')[1].split(')')[0].strip()
                            series = series.split(' (')[0].strip()
                        else:
                            seriesnum = ''
                            series = series.strip()
                        seriesname = cleanName(unaccented(series), '&/')
                        seriesnum = cleanName(unaccented(seriesnum))
                        serieslist.append(('', seriesnum, seriesname))
                    except IndexError:
                        pass
            except IndexError:
                pass

    return serieslist
Exemple #7
0
def get_book_desc(isbn=None, author=None, title=None):
    """ GoodReads does not always have a book description in its api results
        due to restrictive TOS from some of its providers.
        Try to get missing descriptions from googlebooks
        Return description, empty string if not found, None if error"""
    if not author or not title:
        return ''

    author = cleanName(author)
    title = cleanName(title)
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        baseurl = 'https://www.googleapis.com/books/v1/volumes?q='

        urls = [
            baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))
        ]
        if isbn:
            urls.insert(0, baseurl + quote_plus('isbn:' + isbn))

        for url in urls:
            if lazylibrarian.CONFIG['GB_API']:
                url += '&key=' + lazylibrarian.CONFIG['GB_API']
            if lazylibrarian.CONFIG['GB_COUNTRY'] and len(
                    lazylibrarian.CONFIG['GB_COUNTRY'] == 2):
                url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY']
            results, cached = gb_json_request(url)
            if results is None:  # there was an error
                return None
            if results and not cached:
                time.sleep(1)
            if results and 'items' in results:
                for item in results['items']:
                    # noinspection PyBroadException
                    try:
                        auth = item['volumeInfo']['authors'][0]
                        book = item['volumeInfo']['title']
                        desc = item['volumeInfo']['description']
                        book_fuzz = fuzz.token_set_ratio(book, title)
                        auth_fuzz = fuzz.token_set_ratio(auth, author)
                        if book_fuzz > 98 and auth_fuzz > 80:
                            return desc
                    except Exception:
                        pass
    return ''
Exemple #8
0
def get_book_desc(isbn=None, author=None, title=None):
    """ GoodReads does not always have a book description in its api results
        due to restrictive TOS from some of its providers.
        Try to get missing descriptions from googlebooks
        Return description, empty string if not found, None if error"""
    if not author or not title:
        return ''

    author = cleanName(author)
    title = cleanName(title)
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        baseurl = 'https://www.googleapis.com/books/v1/volumes?q='

        urls = [baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))]
        if isbn:
            urls.insert(0, baseurl + quote_plus('isbn:' + isbn))

        for url in urls:
            if lazylibrarian.CONFIG['GB_API']:
                url += '&key=' + lazylibrarian.CONFIG['GB_API']
            if lazylibrarian.CONFIG['GB_COUNTRY'] and len(lazylibrarian.CONFIG['GB_COUNTRY'] == 2):
                url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY']
            results, cached = gb_json_request(url)
            if results is None:  # there was an error
                return None
            if results and not cached:
                time.sleep(1)
            if results and 'items' in results:
                for item in results['items']:
                    # noinspection PyBroadException
                    try:
                        auth = item['volumeInfo']['authors'][0]
                        book = item['volumeInfo']['title']
                        desc = item['volumeInfo']['description']
                        book_fuzz = fuzz.token_set_ratio(book, title)
                        auth_fuzz = fuzz.token_set_ratio(auth, author)
                        if book_fuzz > 98 and auth_fuzz > 80:
                            return desc
                    except Exception:
                        pass
    return ''
Exemple #9
0
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode):

    params = None
    if searchType == "book":
        authorname, bookname = get_searchterm(book, searchType)
        if provider['BOOKSEARCH'] and provider[
                'BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider[
                'BOOKCAT']:  # if not, try general search
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "shortbook":
        authorname, bookname = get_searchterm(book, searchType)
        if provider['BOOKSEARCH'] and provider[
                'BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider['BOOKCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "mag":
        if provider['MAGSEARCH'] and provider[
                'MAGCAT']:  # if specific magsearch, use it
            params = {
                "t": provider['MAGSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": cleanName(book['searchterm']),
                "extended": provider['EXTENDED'],
            }
        elif provider['GENERALSEARCH'] and provider['MAGCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": cleanName(book['searchterm']),
                "extended": provider['EXTENDED'],
            }
    else:
        if provider['GENERALSEARCH']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                # this is a general search
                "q": cleanName(book['searchterm']),
                "extended": provider['EXTENDED'],
            }
    if params:
        logger.debug('[NewzNabPlus] - %s Search parameters set to %s' %
                     (searchMode, str(params)))
    else:
        logger.debug('[NewzNabPlus] - %s No matching search parameters' %
                     searchMode)

    return params
Exemple #10
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug(
                'Book %s googlebooks language does not match preference, %s' %
                (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn(
                    'Book %s Publication date does not match preference, %s' %
                    (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn(
                    'Book %s Future publication date does not match preference, %s'
                    % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?',
                               (AuthorID, ))
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName=?',
                    (author['authorname'], ))
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [
                            'Skipped', 'Ignored'
                    ]:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID,
                                              entrystatus=lazylibrarian.
                                              CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" %
                    (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' %
                             (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'],
                           cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemple #11
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         entrystatus='Active',
                         refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(
                            URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn=?',
                                        (isbnhead, ))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    book['isbn']
                                            ) == 13 and book[
                                                    'isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action(
                                                    'insert into languages values (?, ?)',
                                                    (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action(
                                                'insert into languages values (?, ?)',
                                                (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (book['name'], googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' %
                                             (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {
                                ':': '.',
                                '"': '',
                                '\'': ''
                            }).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid, ))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG[
                                        'NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match(
                                '[^\w-]', bookname
                        ):  # remove books with bad characters in title
                            logger.debug(
                                "[%s] removed book for bad characters" %
                                bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug(
                                    'Rejecting %s, no publication date' %
                                    bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' %
                                             bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace(
                                '"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid, ))
                            if match:  # we have a book with this bookid already
                                if bookname != match[
                                        'BookName'] or authorname != match[
                                            'AuthorName']:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                        % (bookid, authorname, bookname,
                                           match['AuthorName'],
                                           match['BookName']))
                                else:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got this book in database'
                                        % (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE']
                                and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug("Book found: " + bookname + " " +
                                             book['date'])
                                updated = False
                                if 'nocover' in book[
                                        'img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            'Updated cover for %s using %s' %
                                            (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith(
                                        'http'):
                                    link, success, _ = cache_img(
                                        "book",
                                        bookid,
                                        book['img'],
                                        refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [
                                        ('', book['seriesNum'],
                                         cleanName(unaccented(book['series']),
                                                   '&/'))
                                    ]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug(
                                            'Updated series: %s [%s]' %
                                            (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid, ))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" %
                         (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s for author" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" %
                         (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                 lt_lang_hits, gb_lang_change, cache_hits, ignored,
                 removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Exemple #12
0
def getWorkSeries(bookID=None):
    """ Return the series names and numbers in series for the given id as a list of tuples
        For goodreads the id is a WorkID, for librarything it's a BookID """
    myDB = database.DBConnection()
    serieslist = []
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return serieslist

    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        URL = "https://www.goodreads.com/work/"
        seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[
            'GR_API']

        rootxml, in_cache = gr_xml_request(seriesurl)
        if rootxml is None:
            logger.warn('Error getting XML for %s' % seriesurl)
        else:
            resultxml = rootxml.getiterator('series_work')
            for item in resultxml:
                try:
                    seriesname = item.find('./series/title').text
                    seriesname = seriesname.strip('\n').strip('\n').strip()
                    seriesid = item.find('./series/id').text
                    seriesnum = item.find('./user_position').text
                except (KeyError, AttributeError):
                    continue
                if seriesname and seriesid:
                    seriesname = cleanName(unaccented(seriesname), '&/')
                    if seriesname:
                        seriesnum = cleanName(unaccented(seriesnum))
                        serieslist.append((seriesid, seriesnum, seriesname))
                        match = myDB.match(
                            'SELECT SeriesID from series WHERE SeriesName=?',
                            (seriesname, ))
                        if not match:
                            match = myDB.match(
                                'SELECT SeriesName from series WHERE SeriesID=?',
                                (seriesid, ))
                            if not match:
                                myDB.action(
                                    'INSERT INTO series VALUES (?, ?, ?, ?, ?)',
                                    (seriesid, seriesname, "Active", 0, 0))
                            else:
                                logger.warn(
                                    "Name mismatch for series %s, [%s][%s]" %
                                    (seriesid, seriesname,
                                     match['SeriesName']))
                        elif match['SeriesID'] != seriesid:
                            myDB.action(
                                'UPDATE series SET SeriesID=? WHERE SeriesName=?',
                                (seriesid, seriesname))
    else:
        work = getBookWork(bookID, "Series")
        if work:
            try:
                slist = work.split('<h3><b>Series:')[1].split(
                    '</h3>')[0].split('<a href="/series/')
                for item in slist[1:]:
                    try:
                        series = item.split('">')[1].split('</a>')[0]
                        if series and '(' in series:
                            seriesnum = series.split('(')[1].split(
                                ')')[0].strip()
                            series = series.split(' (')[0].strip()
                        else:
                            seriesnum = ''
                            series = series.strip()
                        seriesname = cleanName(unaccented(series), '&/')
                        seriesnum = cleanName(unaccented(seriesnum))
                        if seriesname:
                            serieslist.append(('', seriesnum, seriesname))
                    except IndexError:
                        pass
            except IndexError:
                pass

    return serieslist
Exemple #13
0
def TORDownloadMethod(bookid=None, tor_prov=None, tor_title=None, tor_url=None):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url = tor_url.split('&file=')[0]
            value = tor_url.split('&file=')[1]
            if isinstance(value, str):
                value = value.decode('utf-8')  # make unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        request = urllib2.Request(ur'%s' % tor_url)
        if lazylibrarian.PROXY_HOST:
            request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE)
        request.add_header('Accept-encoding', 'gzip')
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = urllib2.urlopen(request, timeout=90)
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
                torrent = f.read()
            else:
                torrent = response.read()

        except (socket.timeout) as e:
            logger.warn('Timeout fetching torrent from url: %s' % tor_url)
            return False
        except (urllib2.URLError) as e:
            logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason))
            return False

    if lazylibrarian.TOR_DOWNLOADER_BLACKHOLE:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        tor_title = None
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.TOR_CONVERT_MAGNET:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name = tor_name + '.magnet'
                tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
                with open(tor_path, 'wb') as torrent_file:
                    torrent_file.write(torrent)
                logger.debug('Magnet file saved: %s' % tor_path)
                setperm(tor_path)
                downloadID = Source
        else:
            tor_name = tor_name + '.torrent'
            tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
            with open(tor_path, 'wb') as torrent_file:
                torrent_file.write(torrent)
            setperm(tor_path)
            logger.debug('Torrent file saved: %s' % tor_name)
            downloadID = Source

    if (lazylibrarian.TOR_DOWNLOADER_UTORRENT and lazylibrarian.UTORRENT_HOST):
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_RTORRENT and lazylibrarian.RTORRENT_HOST):
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_QBITTORRENT and lazylibrarian.QBITTORRENT_HOST):
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url)  # returns hash or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)
        else:
            logger.debug("qbittorrent returned: %s" % str(response))

    if (lazylibrarian.TOR_DOWNLOADER_TRANSMISSION and lazylibrarian.TRANSMISSION_HOST):
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_SYNOLOGY and lazylibrarian.USE_SYNOLOGY and lazylibrarian.SYNOLOGY_HOST):
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if (lazylibrarian.TOR_DOWNLOADER_DELUGE and lazylibrarian.DELUGE_HOST):
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.DELUGE_USER:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                     int(lazylibrarian.DELUGE_PORT),
                                     lazylibrarian.DELUGE_USER,
                                     lazylibrarian.DELUGE_PASS)
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.DELUGE_LABEL:
                        labelled = client.call('label.set_torrent', downloadID, lazylibrarian.DELUGE_LABEL)
                    result = client.call('core.get_torrent_status', downloadID, {})
                    #for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s' % str(e))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid)
        myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' %
                    (Source, downloadID, full_url))
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url))
        return True
    else:
        logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url)
        return False
Exemple #14
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?',
                        (seriesid, ))
    seriesname = result['SeriesName']
    members, api_hits = getSeriesMembers(seriesid, seriesname)
    dic = {
        u'\u2018': "",
        u'\u2019': "",
        u'\u201c': '',
        u'\u201d': '',
        "'": "",
        '"': ''
    }

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            # pubyear = member[5]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if not in_cache:
                        api_hits += 1
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find(
                                        './best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find(
                                        './best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug(
                                    "Author Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(
                                lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(
                            params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if not in_cache:
                            api_hits += 1
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' %
                                        searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(
                                    booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find(
                                            './best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find(
                                            './best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug(
                                        "Title Search found %s %s, authorid %s"
                                        % (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" %
                                    (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" %
                                 (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth
Exemple #15
0
    def get_author_books(self,
                         authorid=None,
                         authorname=None,
                         bookstatus="Skipped",
                         refresh=False):
        try:
            logger.debug('[%s] Now processing books with Google Books API' %
                         authorname)
            # google doesnt like accents in author names
            set_url = self.url + urllib.quote(
                'inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urllib.urlencode(self.params)

                    try:
                        jsonresults, in_cache = get_json_request(
                            URL, useCache=not refresh)
                        if not jsonresults:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except HTTPError as err:
                        logger.warn(
                            'Google Books API Error [%s]: Check your API key or wait a while'
                            % err.reason)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' %
                                     (number_results, plural(number_results),
                                      authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1

                        # skip if no author, no author is no book.
                        try:
                            _ = item['volumeInfo']['authors'][0]
                        except KeyError:
                            logger.debug(
                                'Skipped a result without authorfield.')
                            continue

                        try:
                            if item['volumeInfo']['industryIdentifiers'][0][
                                    'type'] == 'ISBN_10':
                                bookisbn = item['volumeInfo'][
                                    'industryIdentifiers'][0]['identifier']
                            else:
                                bookisbn = ""
                        except KeyError:
                            bookisbn = ""

                        isbnhead = ""
                        if len(bookisbn) == 10:
                            isbnhead = bookisbn[0:3]
                        elif len(bookisbn) == 13:
                            isbnhead = bookisbn[3:6]

                        try:
                            booklang = item['volumeInfo']['language']
                        except KeyError:
                            booklang = "Unknown"

                        # do we care about language?
                        if "All" not in valid_langs:
                            if bookisbn != "":
                                # seems google lies to us, sometimes tells us books
                                # are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match(
                                        'SELECT lang FROM languages where isbn = "%s"'
                                        % isbnhead)
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug(
                                            "Found cached language [%s] for [%s]"
                                            % (booklang, isbnhead))
                                        match = True
                                    if not match:
                                        # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(
                                                    bookisbn
                                            ) == 13 and bookisbn.startswith(
                                                    '979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_979_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break

                                            elif (len(bookisbn) == 10) or \
                                                    (len(bookisbn) == 13 and bookisbn.startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(
                                                            lang):
                                                        booklang = lazylibrarian.isbn_978_dict[
                                                            lang]
                                                        logger.debug(
                                                            "ISBN979 returned %s for %s"
                                                            % (booklang,
                                                               isbnhead))
                                                        match = True
                                                        break

                                            if match:
                                                myDB.action(
                                                    'insert into languages values ("%s", "%s")'
                                                    % (isbnhead, booklang))
                                                logger.debug(u"GB language: " +
                                                             booklang)

                                    if not match:
                                        # try searching librarything for a language code using the isbn
                                        # if no language found, librarything return value is "invalid" or "unknown"
                                        # librarything returns plain text, not xml
                                        BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn
                                        try:
                                            librarything_wait()
                                            resp = urllib2.urlopen(
                                                BOOK_URL, timeout=30).read()
                                            lt_lang_hits += 1
                                            logger.debug(
                                                "LibraryThing reports language [%s] for %s"
                                                % (resp, isbnhead))

                                            if resp != 'invalid' and resp != 'unknown':
                                                booklang = resp  # found a language code
                                                match = True
                                                myDB.action(
                                                    'insert into languages values ("%s", "%s")'
                                                    % (isbnhead, booklang))
                                                logger.debug(u"LT language: " +
                                                             booklang)
                                        except Exception as e:
                                            booklang = ""
                                            logger.error(
                                                "Error finding language: %s" %
                                                str(e))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in [
                                                "en-US", "en-GB", "eng"
                                        ]:
                                            # these are all english, may need to expand this list
                                            booknamealt = item['volumeInfo'][
                                                'title']
                                            logger.debug(
                                                "%s Google thinks [%s], we think [%s]"
                                                % (booknamealt, googlelang,
                                                   booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                booknamealt = item['volumeInfo']['title']
                                logger.debug('Skipped [%s] with language %s' %
                                             (booknamealt, booklang))
                                ignored += 1
                                continue

                        try:
                            bookpub = item['volumeInfo']['publisher']
                        except KeyError:
                            bookpub = ""

                        try:
                            booksub = item['volumeInfo']['subtitle']
                        except KeyError:
                            booksub = ""

                        if not booksub:
                            series = ""
                            seriesNum = ""
                        else:
                            try:
                                series = booksub.split('(')[1].split(
                                    ' Series ')[0]
                            except IndexError:
                                series = ""
                            if series.endswith(')'):
                                series = series[:-1]
                            try:
                                seriesNum = booksub.split('(')[1].split(
                                    ' Series ')[1].split(')')[0]
                                if seriesNum[0] == '#':
                                    seriesNum = seriesNum[1:]
                            except IndexError:
                                seriesNum = ""

                            if not seriesNum and '#' in series:
                                words = series.rsplit('#', 1)
                                series = words[0].strip()
                                seriesNum = words[1].strip()
                            if not seriesNum and ' ' in series:
                                words = series.rsplit(' ', 1)
                                # has to be unicode for isnumeric()
                                if (u"%s" % words[1]).isnumeric():
                                    series = words[0]
                                    seriesNum = words[1]

                        try:
                            bookdate = item['volumeInfo']['publishedDate']
                        except KeyError:
                            bookdate = '0000-00-00'

                        try:
                            bookimg = item['volumeInfo']['imageLinks'][
                                'thumbnail']
                        except KeyError:
                            bookimg = 'images/nocover.png'

                        try:
                            bookrate = item['volumeInfo']['averageRating']
                        except KeyError:
                            bookrate = 0

                        try:
                            bookpages = item['volumeInfo']['pageCount']
                        except KeyError:
                            bookpages = 0

                        try:
                            bookgenre = item['volumeInfo']['categories'][0]
                        except KeyError:
                            bookgenre = ""

                        try:
                            bookdesc = item['volumeInfo']['description']
                        except KeyError:
                            bookdesc = ""

                        bookname = item['volumeInfo']['title']
                        bookname = unaccented(bookname)
                        dic = {':': '.', '"': '', '\'': ''}
                        bookname = replace_all(bookname, dic)
                        bookname = bookname.strip()  # strip whitespace

                        booklink = item['volumeInfo']['canonicalVolumeLink']
                        bookrate = float(bookrate)
                        bookid = item['id']

                        # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions)
                        # and sometimes uses the same bookid if the book is the same but the title is slightly different
                        #
                        # Not sure if googlebooks does too, but we only want one...
                        existing_book = myDB.match(
                            'SELECT Status,Manual FROM books WHERE BookID = "%s"'
                            % bookid)
                        if existing_book:
                            book_status = existing_book['Status']
                            locked = existing_book['Manual']
                            if locked is None:
                                locked = False
                            elif locked.isdigit():
                                locked = bool(int(locked))
                        else:
                            book_status = bookstatus  # new_book status, or new_author status
                            locked = False

                        rejected = False
                        check_status = False

                        if re.match(
                                '[^\w-]', bookname
                        ):  # remove books with bad characters in title
                            logger.debug(
                                "[%s] removed book for bad characters" %
                                bookname)
                            removedResults += 1
                            rejected = True

                        if not rejected and not bookname:
                            logger.debug(
                                'Rejecting bookid %s for %s, no bookname' %
                                (bookid, authorname))
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if bookdate > today()[:len(bookdate)]:
                                logger.debug(
                                    'Rejecting %s, future publication date %s'
                                    % (bookname, bookdate))
                                removedResults += 1
                                rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE'% \
                                    (bookname.replace('"', '""'), authorname.replace('"', '""'))
                            match = myDB.match(cmd)
                            if match:
                                if match['BookID'] != bookid:
                                    # we have a different book with this author/title already
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got %s'
                                        % (match['BookID'], authorname,
                                           bookname, bookid))
                                    rejected = True
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID="%s"' % bookid
                            match = myDB.match(cmd)
                            if match:
                                # we have a book with this bookid already
                                if bookname != match[
                                        'BookName'] or authorname != match[
                                            'AuthorName']:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]'
                                        % (bookid, authorname, bookname,
                                           match['AuthorName'],
                                           match['BookName']))
                                else:
                                    logger.debug(
                                        'Rejecting bookid %s for [%s][%s] already got this book in database'
                                        % (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            if book_status != "Ignored" and not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": bookgenre,
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": bookdate,
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "BookAdded": today()
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict,
                                            controlValueDict)
                                logger.debug(u"Book found: " + bookname + " " +
                                             bookdate)
                                updated = False
                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(
                                            u'Updated cover for %s to %s' %
                                            (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book",
                                                              bookid,
                                                              bookimg,
                                                              refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict,
                                                    controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug(
                                            'Failed to cache image for %s' %
                                            bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug(
                                            u'Updated series: %s [%s]' %
                                            (bookid, seriesdict))
                                        updated = True
                                    else:
                                        # librarything doesn't have series info. Any in the title?
                                        if series:
                                            seriesdict = {
                                                cleanName(unaccented(series)):
                                                seriesNum
                                            }
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict,
                                                       bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)

                                if not existing_book:
                                    logger.debug(
                                        "[%s] Added book: %s [%s] status %s" %
                                        (authorname, bookname, booklang,
                                         book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(
                                        "[%s] Updated book: %s [%s] status %s"
                                        % (authorname, bookname, booklang,
                                           book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug(
                '[%s] The Google Books API was hit %s time%s to populate book list'
                % (authorname, api_hits, plural(api_hits)))

            lastbook = myDB.match(
                'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \
                               AND Status != "Ignored" order by BookDate DESC'
                % authorid)

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": "Active",
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" %
                         (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s for author" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" %
                         (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" %
                         (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" %
                         (resultcount, plural(resultcount)))

            myDB.action(
                'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)'
                % (authorname.replace('"', '""'), api_hits, gr_lang_hits,
                   lt_lang_hits, gb_lang_change, cache_hits, ignored,
                   removedResults, not_cached, duplicates))

            if refresh:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s / Updated %s book%s"
                    % (authorname, added_count, plural(added_count),
                       updated_count, plural(updated_count)))
            else:
                logger.info(
                    "[%s] Book processing complete: Added %s book%s to the database"
                    % (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' %
                         traceback.format_exc())
Exemple #16
0
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode):

    params = None
    if searchType == "book":
        authorname = book['authorName']
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(book['bookName'])
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname) + 1:]

        if provider['BOOKSEARCH'] and provider[
                'BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider[
                'BOOKCAT']:  # if not, try general search
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "shortbook":
        authorname = book['authorName']
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(book['bookName'])
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan in his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan in his own words"
            bookname = bookname[len(authorname) + 1:]
        if '(' in bookname:
            bookname = bookname.split('(')[0].strip()
        if provider['BOOKSEARCH'] and provider[
                'BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider['BOOKCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "mag":
        if provider['MAGSEARCH'] and provider[
                'MAGCAT']:  # if specific magsearch, use it
            params = {
                "t": provider['MAGSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": book['searchterm'],
                "extended": provider['EXTENDED'],
            }
        elif provider['GENERALSEARCH'] and provider['MAGCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": book['searchterm'],
                "extended": provider['EXTENDED'],
            }
    else:
        if provider['GENERALSEARCH']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                # this is a general search
                "q": book['searchterm'],
                "extended": provider['EXTENDED'],
            }
    if params:
        logger.debug('[NewzNabPlus] - %s Search parameters set to %s' %
                     (searchMode, str(params)))
    else:
        logger.debug('[NewzNabPlus] - %s No matching search parameters' %
                     searchMode)

    return params
Exemple #17
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                updated = False
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
Exemple #18
0
    def find_book(self, bookid=None, bookstatus="None"):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = gb_json_request(URL)

        if jsonresults is None:
            logger.debug('No results found for %s' % bookid)
            return

        if not bookstatus:
            bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS']

        book = bookdict(jsonresults)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(book['name'], dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        if not book['author']:
            logger.debug('Book %s does not contain author field, skipping' % bookname)
            return
        # warn if language is in ignore list, but user said they wanted this book
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if book['lang'] not in valid_langs and 'All' not in valid_langs:
            logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang']))

        if lazylibrarian.CONFIG['NO_PUBDATE']:
            if not book['date'] or book['date'] == '0000':
                logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date']))

        if lazylibrarian.CONFIG['NO_FUTURE']:
            if book['date'] > today()[:4]:
                logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date']))

        authorname = book['author']
        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,))
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],))
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                 (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author with newauthor status
                    # User hit "add book" button from a search or a wishlist import
                    newauthor_status = 'Active'
                    if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']:
                        newauthor_status = 'Paused'
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": newauthor_status
                    }
                    authorname = author['authorname']
                    myDB.upsert("authors", newValueDict, controlValueDict)
                    if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']:
                        self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS'])
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": book['sub'],
            "BookDesc": book['desc'],
            "BookIsbn": book['isbn'],
            "BookPub": book['pub'],
            "BookGenre": book['genre'],
            "BookImg": book['img'],
            "BookLink": book['link'],
            "BookRate": float(book['rate']),
            "BookPages": book['pages'],
            "BookDate": book['date'],
            "BookLang": book['lang'],
            "Status": bookstatus,
            "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'],
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s by %s added to the books database" % (bookname, authorname))

        if 'nocover' in book['img'] or 'nophoto' in book['img']:
            # try to get a cover from another source
            workcover, source = getBookCover(bookid)
            if workcover:
                logger.debug('Updated cover for %s using %s' % (bookname, source))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif book['img'] and book['img'].startswith('http'):
                link, success, _ = cache_img("book", bookid, book['img'])
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % book['img'])

        serieslist = []
        if book['series']:
            serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
        if lazylibrarian.CONFIG['ADD_SERIES']:
            newserieslist = getWorkSeries(bookid)
            if newserieslist:
                serieslist = newserieslist
                logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
        setSeries(serieslist, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemple #19
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,))
    seriesname = result['SeriesName']
    members = getSeriesMembers(seriesid)
    dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''}

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find('./best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find('./best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug("Author Search found %s %s, authorid %s" %
                                             (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' % searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find('./best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find('./best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug("Title Search found %s %s, authorid %s" %
                                                 (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname))
    return newauth
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None, library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet:?'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    elif 'magnet:?' in tor_url:
        # discard any other parameters and just use the magnet link
        torrent = 'magnet:?' + tor_url.split('magnet:?')[1]
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = makeUnicode(value)  # ensure unicode
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add extra parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url, headers=headers, timeout=90, proxies=proxies)
            torrent = r.content
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            # some jackett providers redirect internally using http 301 to a magnet link
            # which requests can't handle, so throws an exception
            if "magnet:?" in str(e):
                torrent = 'magnet:?' + str(e).split('magnet:?')[1]. strip("'")
            else:
                if hasattr(e, 'reason'):
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, e.reason))
                else:
                    logger.warn('%s fetching file from url: %s, %s' % (type(e).__name__, tor_url, str(e)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm '
                    setperm(tor_path)
                    msg += 'Saved '
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.warn("Failed to write magnet to file: %s %s" % (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved '
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.warn("Failed to write torrent to file: %s %s" % (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    hashid = CalcTorrentHash(torrent)
    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            status = qbittorrent.addTorrent(torrent, hashid)
        else:
            status = qbittorrent.addTorrent(tor_url, hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug("TORRENT %s [%s] [%s]" % (len(torrent), torrent[:20], torrent[-20:]))
        Source = "TRANSMISSION"
        if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
            downloadID = transmission.addTorrent(torrent)  # returns id or False
        elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
            downloadID = transmission.addTorrent(torrent)
        elif torrent:
            downloadID = transmission.addTorrent(None, metainfo=b64encode(torrent))
        else:
            downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = hashid
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            if isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                downloadID = deluge.addTorrent(torrent)
            elif torrent:
                downloadID = deluge.addTorrent(tor_title, data=b64encode(torrent))
            else:
                downloadID = deluge.addTorrent(tor_url)  # can be link or magnet, returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                elif isinstance(torrent, binary_type) and torrent.startswith(b'magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif isinstance(torrent, text_type) and torrent.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', torrent, args)
                elif torrent:
                    downloadID = client.call('core.add_torrent_file', tor_title, b64encode(torrent), args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'].lower())
                    result = client.call('core.get_torrent_status', downloadID, {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.error('DelugeRPC failed %s %s' % (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" % library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" % (tor_title, word))
                        break
                if rejected:
                    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?', (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,))
        elif library == 'AudioBook':
            myDB.action('UPDATE books SET audiostatus="Snatched" WHERE BookID=?', (bookid,))
        myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
                    (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (full_url,))
    return False
def TORDownloadMethod(bookid=None,
                      tor_prov=None,
                      tor_title=None,
                      tor_url=None):
    myDB = database.DBConnection()
    download = False
    full_url = tor_url  # keep the url as stored in "wanted" table
    if (lazylibrarian.TOR_DOWNLOADER_DELUGE
            or lazylibrarian.TOR_DOWNLOADER_UTORRENT
            or lazylibrarian.TOR_DOWNLOADER_QBITTORRENT
            or lazylibrarian.TOR_DOWNLOADER_BLACKHOLE
            or lazylibrarian.TOR_DOWNLOADER_TRANSMISSION):

        if tor_url and tor_url.startswith('magnet'):
            torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent
        else:
            if '&file=' in tor_url:
                # torznab results need to be re-encoded
                # had a problem with torznab utf-8 encoded strings not matching
                # our utf-8 strings because of long/short form differences
                url = tor_url.split('&file=')[0]
                value = tor_url.split('&file=')[1]
                if isinstance(value, str):
                    value = value.decode('utf-8')  # make unicode
                value = unicodedata.normalize('NFC',
                                              value)  # normalize to short form
                value = value.encode(
                    'unicode-escape')  # then escape the result
                value = value.replace(' ', '%20')  # and encode any spaces
                tor_url = url + '&file=' + value

            # strip url back to the .torrent as some sites add parameters
            if not tor_url.endswith('.torrent'):
                if '.torrent' in tor_url:
                    tor_url = tor_url.split('.torrent')[0] + '.torrent'

            request = urllib2.Request(ur'%s' % tor_url)
            if lazylibrarian.PROXY_HOST:
                request.set_proxy(lazylibrarian.PROXY_HOST,
                                  lazylibrarian.PROXY_TYPE)
            request.add_header('Accept-encoding', 'gzip')
            request.add_header('User-Agent', USER_AGENT)

            # PAB removed this, KAT serves us html instead of torrent if this header is sent
            # if tor_prov == 'KAT':
            #    host = lazylibrarian.KAT_HOST
            #    if not str(host)[:4] == "http":
            #        host = 'http://' + host
            #    request.add_header('Referer', host)

            try:
                response = urllib2.urlopen(request, timeout=90)
                if response.info().get('Content-Encoding') == 'gzip':
                    buf = StringIO(response.read())
                    f = gzip.GzipFile(fileobj=buf)
                    torrent = f.read()
                else:
                    torrent = response.read()

            except (urllib2.URLError, socket.timeout) as e:
                logger.warn('Error fetching torrent from url: %s, %s' %
                            (tor_url, e))
                return False

        if lazylibrarian.TOR_DOWNLOADER_BLACKHOLE:
            tor_title = cleanName(tor_title)
            logger.debug("Sending %s to blackhole" % tor_title)
            tor_name = str.replace(str(tor_title), ' ', '_')
            if tor_url and tor_url.startswith('magnet'):
                tor_name = tor_name + '.magnet'
            else:
                tor_name = tor_name + '.torrent'
            tor_path = os.path.join(lazylibrarian.TORRENT_DIR, tor_name)
            with open(tor_path, 'wb') as torrent_file:
                torrent_file.write(torrent)
            logger.debug('Torrent file saved: %s' % tor_title)
            download = True

        if (lazylibrarian.TOR_DOWNLOADER_UTORRENT
                and lazylibrarian.UTORRENT_HOST):
            logger.debug("Sending %s to Utorrent" % tor_title)
            hash = CalcTorrentHash(torrent)
            download = utorrent.addTorrent(tor_url, hash)

        if (lazylibrarian.TOR_DOWNLOADER_QBITTORRENT
                and lazylibrarian.QBITTORRENT_HOST):
            logger.debug("Sending %s to qbittorrent" % tor_title)
            download = qbittorrent.addTorrent(tor_url)

        if (lazylibrarian.TOR_DOWNLOADER_TRANSMISSION
                and lazylibrarian.TRANSMISSION_HOST):
            logger.debug("Sending %s to Transmission" % tor_title)
            download = transmission.addTorrent(tor_url)

        if (lazylibrarian.TOR_DOWNLOADER_DELUGE and lazylibrarian.DELUGE_HOST):
            logger.debug("Sending %s to Deluge" % tor_title)
            if not lazylibrarian.DELUGE_USER:
                # no username, talk to the webui
                download = deluge.addTorrent(tor_url)
            else:
                # have username, talk to the daemon
                client = DelugeRPCClient(lazylibrarian.DELUGE_HOST,
                                         int(lazylibrarian.DELUGE_PORT),
                                         lazylibrarian.DELUGE_USER,
                                         lazylibrarian.DELUGE_PASS)
                client.connect()
                args = {"name": tor_title}
                download = client.call('core.add_torrent_url', tor_url, args)
                logger.debug('Deluge torrent_id: %s' % download)
                if download and lazylibrarian.DELUGE_LABEL:
                    labelled = client.call('label.set_torrent', download,
                                           lazylibrarian.DELUGE_LABEL)
                    logger.debug('Deluge label returned: %s' % labelled)
    else:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if download:
        logger.debug(u'Torrent id %s has been downloaded from %s' %
                     (download, tor_url))
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' %
                    bookid)
        myDB.action('UPDATE wanted SET status = "Snatched" WHERE NZBurl="%s"' %
                    full_url)
        return True
    else:
        logger.error(u'Failed to download torrent @ <a href="%s">%s</a>' %
                     (full_url, tor_url))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' %
                    full_url)
        return False
Exemple #22
0
    def find_book(bookid=None, queue=None):
        myDB = database.DBConnection()
        if not lazylibrarian.CONFIG['GB_API']:
            logger.warn('No GoogleBooks API key, check config')
        URL = 'https://www.googleapis.com/books/v1/volumes/' + \
              str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API']
        jsonresults, in_cache = get_json_request(URL)

        if not jsonresults:
            logger.debug('No results found for %s' % bookid)
            return

        bookname = jsonresults['volumeInfo']['title']
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic)

        bookname = unaccented(bookname)
        bookname = bookname.strip()  # strip whitespace

        try:
            authorname = jsonresults['volumeInfo']['authors'][0]
        except KeyError:
            logger.debug('Book %s does not contain author field, skipping' %
                         bookname)
            return
        try:
            # warn if language is in ignore list, but user said they wanted this book
            booklang = jsonresults['volumeInfo']['language']
            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            if booklang not in valid_langs and 'All' not in valid_langs:
                logger.debug(
                    'Book %s googlebooks language does not match preference, %s'
                    % (bookname, booklang))
        except KeyError:
            logger.debug('Book does not have language field')
            booklang = "Unknown"

        try:
            bookpub = jsonresults['volumeInfo']['publisher']
        except KeyError:
            bookpub = ""

        series = ""
        seriesNum = ""
        try:
            booksub = jsonresults['volumeInfo']['subtitle']
            try:
                series = booksub.split('(')[1].split(' Series ')[0]
            except IndexError:
                series = ""
            try:
                seriesNum = booksub.split('(')[1].split(' Series ')[1].split(
                    ')')[0]
                if seriesNum[0] == '#':
                    seriesNum = seriesNum[1:]
            except IndexError:
                seriesNum = ""
        except KeyError:
            booksub = ""

        try:
            bookdate = jsonresults['volumeInfo']['publishedDate']
        except KeyError:
            bookdate = '0000-00-00'

        try:
            bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail']
        except KeyError:
            bookimg = 'images/nocover.png'

        try:
            bookrate = jsonresults['volumeInfo']['averageRating']
        except KeyError:
            bookrate = 0

        try:
            bookpages = jsonresults['volumeInfo']['pageCount']
        except KeyError:
            bookpages = 0

        try:
            bookgenre = jsonresults['volumeInfo']['categories'][0]
        except KeyError:
            bookgenre = ""

        try:
            bookdesc = jsonresults['volumeInfo']['description']
        except KeyError:
            bookdesc = ""

        try:
            if jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'type'] == 'ISBN_10':
                bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][
                    'identifier']
            else:
                bookisbn = ""
        except KeyError:
            bookisbn = ""

        booklink = jsonresults['volumeInfo']['canonicalVolumeLink']
        bookrate = float(bookrate)

        GR = GoodReads(authorname)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match(
                'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match(
                    'SELECT AuthorID from authors WHERE AuthorName="%s"' %
                    author['authorname'])
                if match:
                    logger.debug(
                        '%s: Changing authorid from %s to %s' %
                        (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match[
                        'AuthorID']  # we have a different authorid for that authorname
                else:  # no author but request to add book, add author as "ignored"
                    # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" %
                        (authorname, bookname))
            return

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": bookgenre,
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": booklang,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' %
                             (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

            elif bookimg and bookimg.startswith('http'):
                link, success = cache_img("book", bookid, bookimg)
                if success:
                    controlValueDict = {"BookID": bookid}
                    newValueDict = {"BookImg": link}
                    myDB.upsert("books", newValueDict, controlValueDict)
                else:
                    logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemple #23
0
    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False):
        try:
            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params)

            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
            except Exception as e:
                logger.error("Error fetching author books: %s" % str(e))
                return
            if rootxml is None:
                logger.debug("Error requesting author books")
                return
            if not in_cache:
                api_hits += 1
            resultxml = rootxml.getiterator('book')

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])

            resultsCount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0

            if not len(resultxml):
                logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid))
            else:
                logger.debug("[%s] Now processing books with GoodReads API" % authorname)
                logger.debug(u"url " + URL)

                authorNameResult = rootxml.find('./author/name').text
                # Goodreads sometimes puts extra whitepase in the author names!
                authorNameResult =  ' '.join(authorNameResult.split())
                logger.debug(u"GoodReads author name [%s]" % authorNameResult)
                loopCount = 1

                while resultxml:
                    for book in resultxml:
                        total_count += 1

                        if book.find('publication_year').text is None:
                            pubyear = "0000"
                        else:
                            pubyear = book.find('publication_year').text

                        try:
                            bookimg = book.find('image_url').text
                            if 'nocover' in bookimg:
                                bookimg = 'images/nocover.png'
                        except (KeyError, AttributeError):
                            bookimg = 'images/nocover.png'

                        bookLanguage = "Unknown"
                        find_field = "id"
                        isbn = ""
                        isbnhead = ""
                        if "All" not in valid_langs:  # do we care about language
                            if book.find('isbn').text:
                                find_field = "isbn"
                                isbn = book.find('isbn').text
                                isbnhead = isbn[0:3]
                            else:
                                if book.find('isbn13').text:
                                    find_field = "isbn13"
                                    isbn = book.find('isbn13').text
                                    isbnhead = isbn[3:6]
                            # Try to use shortcut of ISBN identifier codes described here...
                            # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups
                            if isbnhead:
                                if find_field == "isbn13" and isbn.startswith('979'):
                                    for item in lazylibrarian.isbn_979_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_979_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead))
                                elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')):
                                    for item in lazylibrarian.isbn_978_dict:
                                        if isbnhead.startswith(item):
                                            bookLanguage = lazylibrarian.isbn_978_dict[item]
                                            break
                                    if bookLanguage != "Unknown":
                                        logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead))

                            if bookLanguage == "Unknown" and isbnhead:
                                # Nothing in the isbn dictionary, try any cached results
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if match:
                                    bookLanguage = match['lang']
                                    cache_hits += 1
                                    logger.debug("Found cached language [%s] for %s [%s]" %
                                                 (bookLanguage, find_field, isbnhead))
                                else:
                                    # no match in cache, try searching librarything for a language code using the isbn
                                    # if no language found, librarything return value is "invalid" or "unknown"
                                    # returns plain text, not xml
                                    BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn
                                    try:
                                        librarything_wait()
                                        resp = urllib2.urlopen(BOOK_URL, timeout=30).read()
                                        lt_lang_hits += 1
                                        logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead))

                                        if 'invalid' in resp or 'Unknown' in resp:
                                            bookLanguage = "Unknown"
                                        else:
                                            bookLanguage = resp  # found a language code
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage))
                                    except Exception as e:
                                        logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e)))

                            if bookLanguage == "Unknown":
                                # still  no earlier match, we'll have to search the goodreads api
                                try:
                                    if book.find(find_field).text:
                                        BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \
                                                   book.find(find_field).text + '&' + urllib.urlencode(self.params)
                                        logger.debug(u"Book URL: " + BOOK_URL)

                                        time_now = int(time.time())
                                        if time_now <= lazylibrarian.LAST_GOODREADS:
                                            time.sleep(1)

                                        bookLanguage = ""
                                        try:
                                            BOOK_rootxml, in_cache = get_xml_request(BOOK_URL)
                                            if BOOK_rootxml is None:
                                                logger.debug('Error requesting book language code')
                                            else:
                                                if not in_cache:
                                                    # only update last_goodreads if the result wasn't found in the cache
                                                    lazylibrarian.LAST_GOODREADS = time_now
                                                try:
                                                    bookLanguage = BOOK_rootxml.find('./book/language_code').text
                                                except Exception as e:
                                                    logger.debug("Error finding language_code in book xml: %s" % str(e))
                                        except Exception as e:
                                            logger.debug("Error getting book xml: %s" % str(e))

                                        if not in_cache:
                                            gr_lang_hits += 1
                                        if not bookLanguage:
                                            bookLanguage = "Unknown"
                                            # At this point, give up?
                                            # WhatWork on author/title doesn't give us a language.
                                            # It might give us the "original language" of the book (but not always)
                                            # and our copy might not be in the original language anyway
                                            # eg "The Girl With the Dragon Tattoo" original language Swedish
                                            # If we have an isbn, try WhatISBN to get alternatives
                                            # in case any of them give us a language, but it seems if thinglang doesn't
                                            # have a language for the first isbn code, it doesn't for any of the
                                            # alternatives either
                                            # Goodreads search results don't include the language. Although sometimes
                                            # it's in the html page, it's not in the xml results

                                        if isbnhead != "":
                                            # if GR didn't give an isbn we can't cache it, just use language for this book
                                            myDB.action('insert into languages values ("%s", "%s")' %
                                                        (isbnhead, bookLanguage))
                                            logger.debug("GoodReads reports language [%s] for %s" %
                                                         (bookLanguage, isbnhead))
                                        else:
                                            not_cached += 1

                                        logger.debug(u"GR language: " + bookLanguage)
                                    else:
                                        logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text))
                                        # continue

                                except Exception as e:
                                    logger.debug(u"Goodreads language search failed: %s" % str(e))

                            if bookLanguage not in valid_langs:
                                logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage))
                                ignored += 1
                                continue

                        bookname = book.find('title').text
                        bookid = book.find('id').text
                        bookdesc = book.find('description').text
                        bookisbn = book.find('isbn').text
                        bookpub = book.find('publisher').text
                        booklink = book.find('link').text
                        bookrate = float(book.find('average_rating').text)
                        bookpages = book.find('num_pages').text
                        bookname = unaccented(bookname)

                        bookname, booksub = split_title(authorNameResult, bookname)

                        dic = {':': '.', '"': ''}  # do we need to strip apostrophes , '\'': ''}
                        bookname = replace_all(bookname, dic)
                        bookname = bookname.strip()  # strip whitespace
                        booksub = replace_all(booksub, dic)
                        booksub = booksub.strip()  # strip whitespace
                        if booksub:
                            series, seriesNum = bookSeries(booksub)
                        else:
                            series, seriesNum = bookSeries(bookname)

                        rejected = False
                        check_status = False

                        if re.match('[^\w-]', bookname):  # reject books with bad characters in title
                            logger.debug(u"removed result [" + bookname + "] for bad characters")
                            removedResults += 1
                            rejected = True

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            if pubyear > today()[:4]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear))
                                removedResults += 1
                                rejected = True

                        if not rejected and not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' %
                                         (bookid, authorNameResult))
                            removedResults += 1
                            rejected = True

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \
                                    (bookname, authorNameResult.replace('"', '""'))
                            match = myDB.match(cmd)
                            if match:
                                if match['BookID'] != bookid:
                                    # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorNameResult, bookname, bookid))
                                    duplicates += 1
                                    rejected = True

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid
                            match = myDB.match(cmd)
                            if match:
                                # we have a book with this bookid already
                                if bookname != match['BookName'] or authorNameResult != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorNameResult, bookname,
                                                 match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorNameResult, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = True

                        if check_status or not rejected:
                            existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid)
                            if existing_book:
                                book_status = existing_book['Status']
                                locked = existing_book['Manual']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                book_status = bookstatus  # new_book status, or new_author status
                                locked = False

                            # Is the book already in the database?
                            # Leave alone if locked or status "ignore"
                            if not locked and book_status != "Ignored":
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": booksub,
                                    "BookDesc": bookdesc,
                                    "BookIsbn": bookisbn,
                                    "BookPub": bookpub,
                                    "BookGenre": "",
                                    "BookImg": bookimg,
                                    "BookLink": booklink,
                                    "BookRate": bookrate,
                                    "BookPages": bookpages,
                                    "BookDate": pubyear,
                                    "BookLang": bookLanguage,
                                    "Status": book_status,
                                    "BookAdded": today()
                                }

                                resultsCount += 1
                                updated = False

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug(u"Book found: " + book.find('title').text + " " + pubyear)

                                if 'nocover' in bookimg or 'nophoto' in bookimg:
                                    # try to get a cover from librarything
                                    workcover = getBookCover(bookid)
                                    if workcover:
                                        logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif bookimg and bookimg.startswith('http'):
                                    link, success = cache_img("book", bookid, bookimg, refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % bookimg)

                                seriesdict = {}
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    # prefer series info from librarything
                                    seriesdict = getWorkSeries(bookid)
                                    if seriesdict:
                                        logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
                                        updated = True
                                    else:
                                        if series:
                                            seriesdict = {cleanName(unaccented(series)): seriesNum}
                                    setSeries(seriesdict, bookid)

                                new_status = setStatus(bookid, seriesdict, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug(u"[%s] Added book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug(u"[%s] Updated book: %s [%s] status %s" %
                                                (authorname, bookname, bookLanguage, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1

                    loopCount += 1
                    URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \
                          urllib.urlencode(self.params) + '&page=' + str(loopCount)
                    resultxml = None
                    try:
                        rootxml, in_cache = get_xml_request(URL, useCache=not refresh)
                        if rootxml is None:
                            logger.debug('Error requesting next page of results')
                        else:
                            resultxml = rootxml.getiterator('book')
                            if not in_cache:
                                api_hits += 1
                    except Exception as e:
                        resultxml = None
                        logger.error("Error finding next page of results: %s" % str(e))

                    if resultxml:
                        if all(False for _ in resultxml):  # returns True if iterator is empty
                            resultxml = None

            deleteEmptySeries()
            lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \
                                AND Status != "Ignored" order by BookDate DESC' % authorid)
            if lastbook:
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": "Active",
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }
            myDB.upsert("authors", newValueDict, controlValueDict)

            # This is here because GoodReads sometimes has several entries with the same BookID!
            modified_count = added_count + updated_count

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug(
                "Removed %s bad character or no-name result%s" %
                (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count)))

            myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' %
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
def dbupgrade(db_current_version):
    try:
        myDB = database.DBConnection()
        db_version = 0
        result = myDB.match('PRAGMA user_version')
        if result and result[0]:
            value = str(result[0])
            if value.isdigit():
                db_version = int(value)

        check = myDB.match('PRAGMA integrity_check')
        if check and check[0]:
            result = check[0]
            if result == 'ok':
                logger.debug('Database integrity check: %s' % result)
            else:
                logger.error('Database integrity check: %s' % result)
                # should probably abort now

        if db_version < db_current_version:
            myDB = database.DBConnection()

            if db_version < 1:
                if not has_column(myDB, "authors", "AuthorID"):
                    # it's a new database. Create tables but no need for any upgrading
                    db_version = db_current_version
                    lazylibrarian.UPDATE_MSG = 'Creating new database, version %s' % db_version
                else:
                    lazylibrarian.UPDATE_MSG = 'Updating database to version %s, current version is %s' % (
                        db_current_version, db_version)
                logger.info(lazylibrarian.UPDATE_MSG)
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS authors (AuthorID TEXT UNIQUE, AuthorName TEXT UNIQUE, \
                AuthorImg TEXT, AuthorLink TEXT, DateAdded TEXT, Status TEXT, LastBook TEXT, LastBookImg TEXT, \
                LastLink Text, LastDate TEXT,  HaveBooks INTEGER, TotalBooks INTEGER, AuthorBorn TEXT, \
                AuthorDeath TEXT, UnignoredBooks INTEGER, Manual TEXT)')
                myDB.action('CREATE TABLE IF NOT EXISTS books (AuthorID TEXT, \
                BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, BookFile TEXT, \
                BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, Manual TEXT)'
                            )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS wanted (BookID TEXT, NZBurl TEXT, NZBtitle TEXT, NZBdate TEXT, \
                NZBprov TEXT, Status TEXT, NZBsize TEXT, AuxInfo TEXT, NZBmode TEXT, Source TEXT, DownloadID TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS pastissues AS SELECT * FROM wanted WHERE 0'
                )  # same columns
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS magazines (Title TEXT UNIQUE, Regex TEXT, Status TEXT, \
                MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT, \
                LatestCover TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS languages (isbn TEXT, lang TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS issues (Title TEXT, IssueID TEXT UNIQUE, IssueAcquired TEXT, \
                IssueDate TEXT, IssueFile TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS stats (authorname text, GR_book_hits int, GR_lang_hits int, \
                LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int, \
                duplicates int)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                Status TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)'
                )
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS seriesauthors (SeriesID INTEGER, AuthorID TEXT)'
                )

            # These are the incremental changes before database versioning was introduced.
            # Old database tables might already have these incorporated depending on version, so we need to check...
            if db_version < 1:

                if not has_column(myDB, "books", "BookSub"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book subtitles.'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookSub TEXT')

                if not has_column(myDB, "books", "BookSub"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book publisher'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookPub TEXT')

                if not has_column(myDB, "books", "BookGenre"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold bookgenre'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookGenre TEXT')

                if not has_column(myDB, "books", "BookFile"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold book filename'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN BookFile TEXT')

                if not has_column(myDB, "wanted", "AuxInfo"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold AuxInfo'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN AuxInfo TEXT')

                if not has_column(myDB, "wanted", "NZBsize"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBsize'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN NZBsize TEXT')

                if not has_column(myDB, "wanted", "NZBmode"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBmode'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN NZBmode TEXT')

                if not has_column(myDB, "authors", "UnignoredBooks"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold UnignoredBooks'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE authors ADD COLUMN UnignoredBooks INTEGER'
                    )

                if not has_column(myDB, "magazines", "IssueStatus"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold IssueStatus'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE magazines ADD COLUMN IssueStatus TEXT')

                addedWorkPage = False
                if not has_column(myDB, "books", "WorkPage"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold WorkPage'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN WorkPage TEXT')
                    addedWorkPage = True

                addedSeries = False
                if not has_column(myDB, "series",
                                  "SeriesID") and not has_column(
                                      myDB, "books", "Series"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold Series'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN Series TEXT')
                    addedSeries = True

                # SeriesOrder shouldn't be an integer, some later written books
                # and novellas logically go inbetween books of the main series,
                # and their SeriesOrder is not an integer, eg 1.5
                # so we need to update SeriesOrder to store as text.
                # Because sqlite can't drop columns we create a new column SeriesNum,
                # inherit the old column values, and use SeriesNum instead
                if not has_column(myDB, "books", "SeriesNum") and has_column(
                        myDB, "books", "SeriesOrder"):
                    # no SeriesNum column, so create one
                    lazylibrarian.UPDATE_MSG = 'Updating books to hold SeriesNum'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN SeriesNum TEXT')
                    myDB.action('UPDATE books SET SeriesNum = SeriesOrder')
                    myDB.action('UPDATE books SET SeriesOrder = Null')

                addedIssues = False
                if not has_column(myDB, "issues", "Title"):
                    lazylibrarian.UPDATE_MSG = 'Updating database to hold Issues table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE issues (Title TEXT, IssueID TEXT, IssueAcquired TEXT, IssueDate TEXT, IssueFile TEXT)'
                    )
                    addedIssues = True

                if not has_column(myDB, "issues", "IssueID"):
                    lazylibrarian.UPDATE_MSG = 'Updating Issues table to hold IssueID'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE issues ADD COLUMN IssueID TEXT')
                    addedIssues = True

                myDB.action('DROP TABLE if exists capabilities')

                if addedIssues:
                    try:
                        magazinescan.magazineScan()
                    except Exception as e:
                        logger.debug("Failed to scan magazines, %s" % str(e))

                if addedWorkPage:
                    try:
                        lazylibrarian.UPDATE_MSG = 'Adding WorkPage to existing books'
                        logger.debug(lazylibrarian.UPDATE_MSG)
                        threading.Thread(target=bookwork.setWorkPages,
                                         name="ADDWORKPAGE",
                                         args=[]).start()
                    except Exception as e:
                        logger.debug("Failed to update WorkPages, %s" % str(e))

                if addedSeries:
                    try:
                        books = myDB.select(
                            'SELECT BookID, BookName FROM books')
                        if books:
                            lazylibrarian.UPDATE_MSG = 'Adding series to existing books'
                            logger.debug(lazylibrarian.UPDATE_MSG)
                            tot = len(books)
                            cnt = 0
                            for book in books:
                                cnt += 1
                                lazylibrarian.UPDATE_MSG = 'Adding series to existing books: %s of %s' % (
                                    cnt, tot)
                                series, seriesNum = bookSeries(
                                    book["BookName"])
                                if series:
                                    controlValueDict = {
                                        "BookID": book["BookID"]
                                    }
                                    newValueDict = {
                                        "series": series,
                                        "seriesNum": seriesNum
                                    }
                                    myDB.upsert("books", newValueDict,
                                                controlValueDict)
                    except Exception as e:
                        logger.error('Error: ' + str(e))

            if db_version < 2:
                try:
                    results = myDB.select(
                        'SELECT BookID,NZBsize FROM wanted WHERE NZBsize LIKE "% MB"'
                    )
                    if results:
                        lazylibrarian.UPDATE_MSG = 'Removing units from wanted table'
                        logger.debug(lazylibrarian.UPDATE_MSG)
                        tot = len(results)
                        cnt = 0
                        for units in results:
                            cnt += 1
                            lazylibrarian.UPDATE_MSG = 'Removing units from wanted table: %s of %s' % (
                                cnt, tot)
                            nzbsize = units["NZBsize"]
                            nzbsize = nzbsize.split(' ')[0]
                            myDB.action(
                                'UPDATE wanted SET NZBsize = "%s" WHERE BookID = "%s"'
                                % (nzbsize, units["BookID"]))

                except Exception as e:
                    logger.error('Error: ' + str(e))

            if db_version < 3:
                if has_column(myDB, "books", "SeriesOrder"):
                    lazylibrarian.UPDATE_MSG = 'Removing SeriesOrder from books table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_books (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \
                    SeriesNum TEXT, WorkPage TEXT)')
                    myDB.action(
                        'INSERT INTO temp_books SELECT AuthorID,AuthorName,AuthorLink,BookName,BookSub, \
                    BookDesc,BookGenre,BookIsbn,BookPub,BookRate,BookImg,BookPages,BookLink,BookID, \
                    BookFile,BookDate,BookLang,BookAdded,Status,Series,SeriesNum,WorkPage FROM books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_books RENAME TO books')

                if not has_column(myDB, "pastissues", "BookID"):
                    lazylibrarian.UPDATE_MSG = 'Moving magazine past issues into new table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0'
                    )
                    myDB.action(
                        'DELETE FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0'
                    )

            if db_version < 4:
                if not has_column(myDB, "stats", "duplicates"):
                    lazylibrarian.UPDATE_MSG = 'Updating stats table to hold duplicates'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE stats ADD COLUMN duplicates INT')

            if db_version < 5:
                issues = myDB.select(
                    'SELECT IssueID,IssueDate from issues WHERE length(IssueDate) < 4 and length(IssueDate) > 0'
                )
                if issues:
                    lazylibrarian.UPDATE_MSG = 'Updating issues table to hold 4 digit issue numbers'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    tot = len(issues)
                    cnt = 0
                    for issue in issues:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = 'Updating issues table 4 digits: %s of %s' % (
                            cnt, tot)
                        issueid = issue['IssueID']
                        issuedate = str(issue['IssueDate'])
                        issuedate = issuedate.zfill(4)
                        myDB.action(
                            'UPDATE issues SET IssueDate="%s" WHERE IssueID="%s"'
                            % (issuedate, issueid))

                mags = myDB.select(
                    'SELECT Title,IssueDate from magazines WHERE length(IssueDate) < 4 and length(IssueDate) > 0'
                )
                if mags:
                    lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    tot = len(mags)
                    cnt = 0
                    for mag in mags:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits: %s of %s' % (
                            cnt, tot)
                        title = mag['Title']
                        issuedate = str(mag['IssueDate'])
                        issuedate = issuedate.zfill(4)
                        myDB.action(
                            'UPDATE magazines SET IssueDate="%s" WHERE Title="%s"'
                            % (issuedate, title))

            if db_version < 6:
                if not has_column(myDB, "books", "Manual"):
                    lazylibrarian.UPDATE_MSG = 'Updating books table to hold Manual setting'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE books ADD COLUMN Manual TEXT')

            if db_version < 7:
                if not has_column(myDB, "wanted", "Source"):
                    lazylibrarian.UPDATE_MSG = 'Updating wanted table to hold Source and DownloadID'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE wanted ADD COLUMN Source TEXT')
                    myDB.action(
                        'ALTER TABLE wanted ADD COLUMN DownloadID TEXT')

            if db_version < 8:
                src = os.path.join(lazylibrarian.PROG_DIR,
                                   'data/images/cache/')
                dst = lazylibrarian.CACHEDIR
                images = myDB.select(
                    'SELECT AuthorID, AuthorImg FROM authors WHERE AuthorImg LIKE "images/cache/%"'
                )
                if images:
                    logger.debug('Moving author images to new location')
                    tot = len(images)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % (
                            cnt, tot)
                        img = image['AuthorImg']
                        img = img[7:]
                        myDB.action(
                            'UPDATE authors SET AuthorImg="%s" WHERE AuthorID="%s"'
                            % (img, image['AuthorID']))
                        img = img[6:]
                        srcfile = os.path.join(src, img)
                        if os.path.isfile(srcfile):
                            try:
                                shutil.move(os.path.join(src, img),
                                            os.path.join(dst, img))
                            except Exception as e:
                                logger.warn("dbupgrade: %s" % str(e))
                    logger.debug("Author Image cache updated")

                images = myDB.select(
                    'SELECT BookID, BookImg FROM books WHERE BookImg LIKE "images/cache/%"'
                )
                if images:
                    logger.debug('Moving book images to new location')
                    tot = len(images)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % (
                            cnt, tot)
                        img = image['BookImg']
                        img = img[7:]
                        myDB.action(
                            'UPDATE books SET BookImg="%s" WHERE BookID="%s"' %
                            (img, image['BookID']))
                        img = img[6:]
                        srcfile = os.path.join(src, img)
                        if os.path.isfile(srcfile):
                            try:
                                shutil.move(srcfile, os.path.join(dst, img))
                            except Exception as e:
                                logger.warn("dbupgrade: %s" % str(e))
                    logger.debug("Book Image cache updated")

            if db_version < 9:
                if not has_column(myDB, "magazines", "Reject"):
                    # remove frequency column, rename regex to reject, add new regex column for searches
                    lazylibrarian.UPDATE_MSG = 'Updating magazines table'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (Title TEXT, Regex TEXT, Status TEXT, \
                            MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT)'
                    )
                    myDB.action(
                        'INSERT INTO temp_table SELECT Title, Regex, Status, MagazineAdded, LastAcquired, \
                            IssueDate, IssueStatus, Regex FROM magazines')
                    myDB.action('DROP TABLE magazines')
                    myDB.action('ALTER TABLE temp_table RENAME TO magazines')
                    myDB.action('UPDATE magazines SET Regex = Null')

            if db_version < 10:
                # make sure columns in pastissues match those in wanted table
                # needed when upgrading from old 3rd party packages (eg freenas)
                myDB.action('DROP TABLE pastissues')
                myDB.action(
                    'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE 0'
                )  # same columns, but empty table

            if db_version < 11:
                # keep last book image
                if not has_column(myDB, "authors", "LastBookImg"):
                    lazylibrarian.UPDATE_MSG = 'Updating author table to hold last book image'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE authors ADD COLUMN LastBookImg TEXT')
                    books = myDB.select(
                        'SELECT AuthorID, AuthorName, LastBook from authors')

                    if books:
                        for book in books:
                            lazylibrarian.UPDATE_MSG = 'Updating last book image for %s' % book[
                                'AuthorName']
                            if book['LastBook']:
                                match = myDB.match(
                                    'SELECT BookImg from books WHERE AuthorID="%s" AND BookName="%s"'
                                    % (book['AuthorID'], book['LastBook']))
                                if match:
                                    myDB.action(
                                        'UPDATE authors SET LastBookImg="%s" WHERE AuthorID=%s'
                                        % (match['BookImg'], book['AuthorID']))

            if db_version < 12:
                # keep last magazine issue image
                if not has_column(myDB, "Magazines", "LatestCover"):
                    lazylibrarian.UPDATE_MSG = 'Updating magazine table to hold last issue image'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action(
                        'ALTER TABLE magazines ADD COLUMN LatestCover TEXT')
                    mags = myDB.select(
                        'SELECT Title, LastAcquired from magazines')

                    if mags:
                        for mag in mags:
                            lazylibrarian.UPDATE_MSG = 'Updating last issue image for %s' % mag[
                                'Title']
                            match = myDB.match(
                                'SELECT IssueFile from issues WHERE IssueAcquired="%s" AND Title="%s"'
                                % (mag['LastAcquired'], mag['Title']))
                            if match:
                                coverfile = os.path.splitext(
                                    match['IssueFile'])[0] + '.jpg'
                                if os.path.exists(coverfile):
                                    myDB.action(
                                        'UPDATE magazines SET LatestCover="%s" WHERE Title="%s"'
                                        % (coverfile, mag['Title']))

            if db_version < 13:
                if not has_column(myDB, "authors", "Manual"):
                    lazylibrarian.UPDATE_MSG = 'Updating authors table to hold Manual setting'
                    logger.debug(lazylibrarian.UPDATE_MSG)
                    myDB.action('ALTER TABLE authors ADD COLUMN Manual TEXT')

            if db_version < 14:
                src = lazylibrarian.CACHEDIR
                try:
                    os.mkdir(os.path.join(src, 'author'))
                except OSError as e:
                    if e.errno is not 17:  # already exists is ok
                        logger.debug('mkdir author cache reports: %s' % str(e))

                query = 'SELECT AuthorName, AuthorID, AuthorImg FROM authors '
                query += 'WHERE AuthorImg LIKE "cache/%" '
                query += 'AND AuthorImg NOT LIKE "cache/author/%"'

                images = myDB.select(query)
                if images:
                    tot = len(images)
                    logger.debug('Moving %s author images to new location' %
                                 tot)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % (
                            cnt, tot)
                        try:
                            img = image['AuthorImg']
                            img = img.rsplit('/', 1)[1]
                            srcfile = os.path.join(src, img)
                            if os.path.isfile(srcfile):
                                try:
                                    shutil.move(
                                        srcfile,
                                        os.path.join(src, "author", img))
                                    myDB.action(
                                        'UPDATE authors SET AuthorImg="cache/author/%s" WHERE AuthorID="%s"'
                                        % (img, image['AuthorID']))
                                except Exception as e:
                                    logger.warn("dbupgrade: %s" % str(e))
                        except Exception as e:
                            logger.warn(
                                'Failed to update author image for %s: %s' %
                                (image['AuthorName'], str(e)))
                    logger.debug("Author Image cache updated")

                try:
                    os.mkdir(os.path.join(src, 'book'))
                except OSError as e:
                    if e.errno is not 17:  # already exists is ok
                        logger.debug('mkdir book cache reports: %s' % str(e))

                query = 'SELECT BookName, BookID, BookImg FROM books '
                query += 'WHERE BookImg LIKE "cache/%" '
                query += 'AND BookImg NOT LIKE "cache/book/%"'
                images = myDB.select(query)

                if images:
                    tot = len(images)
                    logger.debug('Moving %s book images to new location' % tot)
                    cnt = 0
                    for image in images:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % (
                            cnt, tot)
                        try:
                            img = image['BookImg']
                            img = img.rsplit('/', 1)[1]
                            srcfile = os.path.join(src, img)
                            if os.path.isfile(srcfile):
                                try:
                                    shutil.move(srcfile,
                                                os.path.join(src, "book", img))
                                    myDB.action(
                                        'UPDATE books SET BookImg="cache/book/%s" WHERE BookID="%s"'
                                        % (img, image['BookID']))
                                except Exception as e:
                                    logger.warn("dbupgrade: %s" % str(e))
                        except Exception as e:
                            logger.warn(
                                'Failed to update book image for %s: %s' %
                                (image['BookName'], str(e)))
                    logger.debug("Book Image cache updated")

                # at this point there should be no more .jpg files in the root of the cachedir
                # any that are still there are for books/authors deleted from database
                # or magazine latest issue cover files that get copied as required
                for image in os.listdir(src):
                    if image.endswith('.jpg'):
                        os.remove(os.path.join(src, image))

            if db_version < 15:
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                        AuthorID TEXT, Status TEXT)')
                myDB.action(
                    'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)'
                )
                if has_column(myDB, "books", "SeriesNum"):
                    lazylibrarian.UPDATE_MSG = 'Populating series and member tables'
                    books = myDB.select(
                        'SELECT BookID, Series, SeriesNum from books')
                    if books:
                        tot = len(books)
                        logger.debug("Updating book series for %s book%s" %
                                     (tot, plural(tot)))
                        cnt = 0
                        for book in books:
                            cnt += 1
                            lazylibrarian.UPDATE_MSG = "Updating book series: %s of %s" % (
                                cnt, tot)
                            seriesdict = getWorkSeries(book['BookID'])
                            if not seriesdict:  # no workpage series, use the current values if present
                                if book['Series'] and book['SeriesNum']:
                                    seriesdict = {
                                        cleanName(unaccented(book['Series'])):
                                        book['SeriesNum']
                                    }
                            setSeries(seriesdict,
                                      book['BookID'],
                                      seriesauthors=False)
                        # deleteEmptySeries  # shouldn't be any on first run?
                        lazylibrarian.UPDATE_MSG = "Book series update complete"
                        logger.debug(lazylibrarian.UPDATE_MSG)

                    lazylibrarian.UPDATE_MSG = 'Removing seriesnum from books table'
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \
                    WorkPage TEXT, Manual TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT AuthorID, AuthorName, AuthorLink, BookName, BookSub, \
                    BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \
                    BookFile, BookDate, BookLang, BookAdded, Status, Series, WorkPage, Manual from books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_table RENAME TO books')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete'

            if db_version < 16:
                if has_column(myDB, "books", "AuthorLink"):
                    lazylibrarian.UPDATE_MSG = 'Removing series, authorlink and authorname from books table'
                    myDB.action(
                        'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, \
                    BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \
                    BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \
                    BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, \
                    Manual TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT AuthorID, BookName, BookSub, \
                    BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \
                    BookFile, BookDate, BookLang, BookAdded, Status, WorkPage, Manual from books'
                    )
                    myDB.action('DROP TABLE books')
                    myDB.action('ALTER TABLE temp_table RENAME TO books')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete'

            if db_version < 17:
                if has_column(myDB, "series", "AuthorID"):
                    lazylibrarian.UPDATE_MSG = 'Creating seriesauthors table'
                    # In this version of the database there is only one author per series so use that as starting point
                    myDB.action(
                        'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))'
                    )
                    series = myDB.select(
                        'SELECT SeriesID,AuthorID from series')
                    cnt = 0
                    tot = len(series)
                    for item in series:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % (
                            cnt, tot)
                        myDB.action(
                            'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)'
                            % (item['SeriesID'], item['AuthorID']),
                            suppress='UNIQUE')

                    myDB.action('DROP TABLE temp_table')
                    myDB.action(
                        'CREATE TABLE temp_table (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \
                    Status TEXT)')
                    myDB.action(
                        'INSERT INTO temp_table SELECT  SeriesID, SeriesName, Status FROM series'
                    )
                    myDB.action('DROP TABLE series')
                    myDB.action('ALTER TABLE temp_table RENAME TO series')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of series table complete'

            if db_version < 18:
                data = myDB.match('pragma index_list(seriesauthors)')
                if not data:
                    lazylibrarian.UPDATE_MSG = 'Adding unique constraint to seriesauthors table'
                    myDB.action('DROP TABLE IF EXISTS temp_table')
                    myDB.action(
                        'ALTER TABLE seriesauthors RENAME to temp_table')
                    myDB.action(
                        'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))'
                    )
                    series = myDB.select(
                        'SELECT SeriesID,AuthorID from temp_table')
                    cnt = 0
                    tot = len(series)
                    for item in series:
                        cnt += 1
                        lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % (
                            cnt, tot)
                        myDB.action(
                            'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)'
                            % (item['SeriesID'], item['AuthorID']),
                            suppress='UNIQUE')
                    myDB.action('DROP TABLE temp_table')
                    lazylibrarian.UPDATE_MSG = 'Reorganisation of seriesauthors complete'

            # Now do any non-version-specific tidying
            try:
                authors = myDB.select(
                    'SELECT AuthorID FROM authors WHERE AuthorName IS NULL')
                if authors:
                    logger.debug(
                        'Removing %s un-named author%s from database' %
                        (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        myDB.action('DELETE from authors WHERE AuthorID="%s"' %
                                    authorid)
                        myDB.action('DELETE from books WHERE AuthorID="%s"' %
                                    authorid)
            except Exception as e:
                logger.error('Error: ' + str(e))

            myDB.action('PRAGMA user_version = %s' % db_current_version)
            lazylibrarian.UPDATE_MSG = 'Cleaning Database after upgrade'
            myDB.action('vacuum')
            lazylibrarian.UPDATE_MSG = 'Database updated to version %s' % db_current_version
            logger.info(lazylibrarian.UPDATE_MSG)

            restartJobs(start='Start')

        lazylibrarian.UPDATE_MSG = ''

    except Exception:
        logger.error('Unhandled exception in database update: %s' %
                     traceback.format_exc())
        lazylibrarian.UPDATE_MSG = ''
Exemple #25
0
    def find_book(self, bookid=None, queue=None):
        myDB = database.DBConnection()

        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params)

        try:
            rootxml, in_cache = get_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book")
                return
        except Exception as e:
            logger.error("Error finding book: %s" % str(e))
            return

        bookLanguage = rootxml.find('./book/language_code').text
        bookname = rootxml.find('./book/title').text

        if not bookLanguage:
            bookLanguage = "Unknown"
        #
        # PAB user has said they want this book, don't block for unwanted language, just warn
        #
        valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
        if bookLanguage not in valid_langs:
            logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage))

        if rootxml.find('./book/publication_year').text is None:
            bookdate = "0000"
        else:
            bookdate = rootxml.find('./book/publication_year').text

        try:
            bookimg = rootxml.find('./book/img_url').text
            if 'assets/nocover' in bookimg:
                bookimg = 'images/nocover.png'
        except (KeyError, AttributeError):
            bookimg = 'images/nocover.png'

        authorname = rootxml.find('./book/authors/author/name').text
        bookdesc = rootxml.find('./book/description').text
        bookisbn = rootxml.find('./book/isbn').text
        bookpub = rootxml.find('./book/publisher').text
        booklink = rootxml.find('./book/link').text
        bookrate = float(rootxml.find('./book/average_rating').text)
        bookpages = rootxml.find('.book/num_pages').text

        name = authorname
        GR = GoodReads(name)
        author = GR.find_author_id()
        if author:
            AuthorID = author['authorid']
            match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID)
            if not match:
                match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' %  author['authorname'])
                if match:
                    logger.debug('%s: Changing authorid from %s to %s' %
                                (author['authorname'], AuthorID, match['AuthorID']))
                    AuthorID = match['AuthorID']    # we have a different authorid for that authorname
                else:   # no author but request to add book, add author as "ignored"
                        # User hit "add book" button from a search
                    controlValueDict = {"AuthorID": AuthorID}
                    newValueDict = {
                        "AuthorName": author['authorname'],
                        "AuthorImg": author['authorimg'],
                        "AuthorLink": author['authorlink'],
                        "AuthorBorn": author['authorborn'],
                        "AuthorDeath": author['authordeath'],
                        "DateAdded": today(),
                        "Status": "Ignored"
                    }
                    myDB.upsert("authors", newValueDict, controlValueDict)
        else:
            logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname))
            return

        bookname = unaccented(bookname)
        bookname, booksub = split_title(authorname, bookname)
        dic = {':': '.', '"': '', '\'': ''}
        bookname = replace_all(bookname, dic).strip()
        booksub = replace_all(booksub, dic).strip()
        if booksub:
            series, seriesNum = bookSeries(booksub)
        else:
            series, seriesNum = bookSeries(bookname)

        controlValueDict = {"BookID": bookid}
        newValueDict = {
            "AuthorID": AuthorID,
            "BookName": bookname,
            "BookSub": booksub,
            "BookDesc": bookdesc,
            "BookIsbn": bookisbn,
            "BookPub": bookpub,
            "BookGenre": "",
            "BookImg": bookimg,
            "BookLink": booklink,
            "BookRate": bookrate,
            "BookPages": bookpages,
            "BookDate": bookdate,
            "BookLang": bookLanguage,
            "Status": "Wanted",
            "BookAdded": today()
        }

        myDB.upsert("books", newValueDict, controlValueDict)
        logger.info("%s added to the books database" % bookname)

        if 'nocover' in bookimg or 'nophoto' in bookimg:
            # try to get a cover from librarything
            workcover = getBookCover(bookid)
            if workcover:
                logger.debug(u'Updated cover for %s to %s' % (bookname, workcover))
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": workcover}
                myDB.upsert("books", newValueDict, controlValueDict)

        elif bookimg and bookimg.startswith('http'):
            link, success = cache_img("book", bookid, bookimg)
            if success:
                controlValueDict = {"BookID": bookid}
                newValueDict = {"BookImg": link}
                myDB.upsert("books", newValueDict, controlValueDict)
            else:
                logger.debug('Failed to cache image for %s' % bookimg)

        if lazylibrarian.CONFIG['ADD_SERIES']:
            # prefer series info from librarything
            seriesdict = getWorkSeries(bookid)
            if seriesdict:
                logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict))
            else:
                if series:
                    seriesdict = {cleanName(unaccented(series)): seriesNum}
            setSeries(seriesdict, bookid)

        worklink = getWorkPage(bookid)
        if worklink:
            controlValueDict = {"BookID": bookid}
            newValueDict = {"WorkPage": worklink}
            myDB.upsert("books", newValueDict, controlValueDict)
Exemple #26
0
def TORDownloadMethod(bookid=None,
                      tor_title=None,
                      tor_url=None,
                      library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    torrent = ''

    full_url = tor_url  # keep the url as stored in "wanted" table
    if 'magnet:?' in tor_url:
        # discard any other parameters and just use the magnet link
        tor_url = 'magnet:?' + tor_url.split('magnet:?')[1]
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC',
                                          value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = makeUnicode(value)  # ensure unicode
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add extra parameters
        if not tor_url.endswith('.torrent') and '.torrent' in tor_url:
            tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': getUserAgent()}
        proxies = proxyList()

        try:
            logger.debug("Fetching %s" % tor_url)
            r = requests.get(tor_url,
                             headers=headers,
                             timeout=90,
                             proxies=proxies)
            if str(r.status_code).startswith('2'):
                torrent = r.content
                if not len(torrent):
                    res = "Got empty response for %s" % tor_url
                    logger.warn(res)
                    return False, res
                elif len(torrent) < 100:
                    res = "Only got %s bytes for %s" % (len(torrent), tor_url)
                    logger.warn(res)
                    return False, res
                else:
                    logger.debug("Got %s bytes for %s" %
                                 (len(torrent), tor_url))
            else:
                res = "Got a %s response for %s" % (r.status_code, tor_url)
                logger.warn(res)
                return False, res

        except requests.exceptions.Timeout:
            res = 'Timeout fetching file from url: %s' % tor_url
            logger.warn(res)
            return False, res
        except Exception as e:
            # some jackett providers redirect internally using http 301 to a magnet link
            # which requests can't handle, so throws an exception
            logger.debug("Requests exception: %s" % str(e))
            if "magnet:?" in str(e):
                tor_url = 'magnet:?' + str(e).split('magnet:?')[1].strip("'")
                logger.debug("Redirecting to %s" % tor_url)
            else:
                if hasattr(e, 'reason'):
                    res = '%s fetching file from url: %s, %s' % (
                        type(e).__name__, tor_url, e.reason)
                else:
                    res = '%s fetching file from url: %s, %s' % (
                        type(e).__name__, tor_url, str(e))
                logger.warn(res)
                return False, res

    if not torrent and not tor_url.startswith('magnet:?'):
        res = "No magnet or data, cannot continue"
        logger.warn(res)
        return False, res

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = calculate_torrent_hash(tor_url)
                if not hashid:
                    hashid = tor_name
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, text_type):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm '
                    setperm(tor_path)
                    msg += 'Saved '
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    res = "Failed to write magnet to file: %s %s" % (
                        type(e).__name__, str(e))
                    logger.warn(res)
                    logger.debug("Progress: %s Filename [%s]" %
                                 (msg, repr(tor_path)))
                    return False, res
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                    tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, text_type):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved '
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                res = "Failed to write torrent to file: %s %s" % (
                    type(e).__name__, str(e))
                logger.warn(res)
                logger.debug("Progress: %s Filename [%s]" %
                             (msg, repr(tor_path)))
                return False, res

    hashid = calculate_torrent_hash(tor_url, torrent)
    if not hashid:
        res = "Unable to calculate torrent hash from url/data"
        logger.error(res)
        logger.debug("url: %s" % tor_url)
        logger.debug("data: %s" % makeUnicode(str(torrent[:50])))
        return False, res

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[
            'UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        downloadID, res = utorrent.addTorrent(tor_url,
                                              hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[
            'RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        if torrent:
            logger.debug("Sending %s data to rTorrent" % tor_title)
            downloadID, res = rtorrent.addTorrent(tor_title,
                                                  hashid,
                                                  data=torrent)
        else:
            logger.debug("Sending %s url to rTorrent" % tor_title)
            downloadID, res = rtorrent.addTorrent(
                tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[
                'QBITTORRENT_HOST']:
        Source = "QBITTORRENT"
        if torrent:
            logger.debug("Sending %s data to qBittorrent" % tor_title)
            status, res = qbittorrent.addFile(torrent, hashid, tor_title)
        else:
            logger.debug("Sending %s url to qBittorrent" % tor_title)
            status, res = qbittorrent.addTorrent(
                tor_url, hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[
                'TRANSMISSION_HOST']:
        Source = "TRANSMISSION"
        if torrent:
            logger.debug("Sending %s data to Transmission" % tor_title)
            # transmission needs b64encoded metainfo to be unicode, not bytes
            downloadID, res = transmission.addTorrent(None,
                                                      metainfo=makeUnicode(
                                                          b64encode(torrent)))
        else:
            logger.debug("Sending %s url to Transmission" % tor_title)
            downloadID, res = transmission.addTorrent(
                tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = hashid
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s url to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID, res = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[
            'DELUGE_HOST']:
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            if torrent:
                logger.debug("Sending %s data to Deluge" % tor_title)
                downloadID, res = deluge.addTorrent(tor_title,
                                                    data=b64encode(torrent))
            else:
                logger.debug("Sending %s url to Deluge" % tor_title)
                downloadID, res = deluge.addTorrent(
                    tor_url)  # can be link or magnet, returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
            else:
                return False, res
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    res = "Sending %s magnet to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_magnet',
                                             tor_url, args)
                elif torrent:
                    res = "Sending %s data to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_file',
                                             tor_title, b64encode(torrent),
                                             args)
                else:
                    res = "Sending %s url to DelugeRPC" % tor_title
                    logger.debug(res)
                    downloadID = client.call('core.add_torrent_url', tor_url,
                                             args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call(
                            'label.set_torrent', downloadID,
                            lazylibrarian.CONFIG['DELUGE_LABEL'].lower())
                    result = client.call('core.get_torrent_status', downloadID,
                                         {})
                    if 'name' in result:
                        tor_title = result['name']
                else:
                    res += ' failed'
                    logger.error(res)
                    return False, res

            except Exception as e:
                res = 'DelugeRPC failed %s %s' % (type(e).__name__, str(e))
                logger.error(res)
                return False, res

    if not Source:
        res = 'No torrent download method is enabled, check config.'
        logger.warn(res)
        return False, res

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn(
                    '%s: name contains hash, probably unresolved magnet' %
                    Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'Magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'],
                                          ',')
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'],
                                          ',')
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'],
                                          ',')
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" %
                                 library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = "Rejecting torrent name %s, contains %s" % (
                            tor_title, word)
                        logger.debug(rejected)
                        break
                if not rejected:
                    rejected = check_contents(Source, downloadID, library,
                                              tor_title)
                if rejected:
                    myDB.action(
                        'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?',
                        (rejected, full_url))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' %
                                 (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?',
                                (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, full_url))
        return True, ''

    res = 'Failed to send torrent to %s' % Source
    logger.error(res)
    return False, res
Exemple #27
0
def LibraryScan(startdir=None):
    """ Scan a directory tree adding new books into database
        Return how many books you added """
    try:
        destdir = lazylibrarian.DIRECTORY('Destination')
        if not startdir:
            if not destdir:
                logger.warn('Cannot find destination directory: %s. Not scanning' % destdir)
                return 0
            startdir = destdir

        if not os.path.isdir(startdir):
            logger.warn('Cannot find directory: %s. Not scanning' % startdir)
            return 0

        if not internet():
            logger.warn('Libraryscan: No internet connection')
            return 0

        myDB = database.DBConnection()

        # keep statistics of full library scans
        if startdir == destdir:
            myDB.action('DELETE from stats')
            try:  # remove any extra whitespace in authornames
                authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "%  %"')
                if authors:
                    logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors))))
                    for author in authors:
                        authorid = author["AuthorID"]
                        authorname = ' '.join(author['AuthorName'].split())
                        # Have we got author name both with-and-without extra spaces? If so, merge them
                        duplicate = myDB.match(
                            'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname)
                        if duplicate:
                            myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName'])
                            if author['AuthorID'] != duplicate['AuthorID']:
                                myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' %
                                            (duplicate['AuthorID'], author['AuthorID']))
                        else:
                            myDB.action(
                                'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid))
            except Exception as e:
                logger.info('Error: ' + str(e))

        logger.info('Scanning ebook directory: %s' % startdir)

        new_book_count = 0
        modified_count = 0
        rescan_count = 0
        rescan_hits = 0
        file_count = 0
        author = ""

        if lazylibrarian.CONFIG['FULL_SCAN']:
            cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors'
            cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"'
            if not startdir == destdir:
                cmd += ' and BookFile like "' + startdir + '%"'
            books = myDB.select(cmd)
            status = lazylibrarian.CONFIG['NOTFOUND_STATUS']
            logger.info('Missing books will be marked as %s' % status)
            for book in books:
                bookID = book['BookID']
                bookfile = book['BookFile']

                if not (bookfile and os.path.isfile(bookfile)):
                    myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID))
                    myDB.action('update books set BookFile="" where BookID="%s"' % bookID)
                    logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName']))

        # to save repeat-scans of the same directory if it contains multiple formats of the same book,
        # keep track of which directories we've already looked at
        processed_subdirectories = []
        warned = False  # have we warned about no new authors setting
        matchString = ''
        for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']:
            matchString = matchString + '\\' + char
        # massage the EBOOK_DEST_FILE config parameter into something we can use
        # with regular expression matching
        booktypes = ''
        count = -1
        booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE'])
        for book_type in booktype_list:
            count += 1
            if count == 0:
                booktypes = book_type
            else:
                booktypes = booktypes + '|' + book_type
        matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace(
            "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']'
        pattern = re.compile(matchString, re.VERBOSE)

        for r, d, f in os.walk(startdir):
            for directory in d[:]:
                # prevent magazine being scanned
                if directory.startswith("_") or directory.startswith("."):
                    d.remove(directory)

            for files in f:
                file_count += 1

                if isinstance(r, str):
                    r = r.decode(lazylibrarian.SYS_ENCODING)

                subdirectory = r.replace(startdir, '')
                # Added new code to skip if we've done this directory before.
                # Made this conditional with a switch in config.ini
                # in case user keeps multiple different books in the same subdirectory
                if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories):
                    logger.debug("[%s] already scanned" % subdirectory)
                else:
                    # If this is a book, try to get author/title/isbn/language
                    # if epub or mobi, read metadata from the book
                    # If metadata.opf exists, use that allowing it to override
                    # embedded metadata. User may have edited metadata.opf
                    # to merge author aliases together
                    # If all else fails, try pattern match for author/title
                    # and look up isbn/lang from LT or GR later
                    match = 0
                    if is_valid_booktype(files):

                        logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory))

                        language = "Unknown"
                        isbn = ""
                        book = ""
                        author = ""
                        gr_id = ""
                        gb_id = ""
                        extn = os.path.splitext(files)[1]

                        # if it's an epub or a mobi we can try to read metadata from it
                        if (extn == ".epub") or (extn == ".mobi"):
                            book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING)

                            try:
                                res = get_book_info(book_filename)
                            except Exception as e:
                                logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e)))
                                res = {}
                            # title and creator are the minimum we need
                            if 'title' in res and 'creator' in res:
                                book = res['title']
                                author = res['creator']
                                if book and len(book) > 2 and author and len(author) > 2:
                                    match = 1
                                if 'language' in res:
                                    language = res['language']
                                if 'identifier' in res:
                                    isbn = res['identifier']
                                if 'type' in res:
                                    extn = res['type']
                                logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" %
                                             (isbn, language, author, book, extn))
                            if not match:
                                logger.debug("Book meta incomplete in %s" % book_filename)

                        # calibre uses "metadata.opf", LL uses "bookname - authorname.opf"
                        # just look for any .opf file in the current directory since we don't know
                        # LL preferred authorname/bookname at this point.
                        # Allow metadata in file to override book contents as may be users pref

                        metafile = opf_file(r)
                        try:
                            res = get_book_info(metafile)
                        except Exception as e:
                            logger.debug('get_book_info failed for %s, %s' % (metafile, str(e)))
                            res = {}
                        # title and creator are the minimum we need
                        if 'title' in res and 'creator' in res:
                            book = res['title']
                            author = res['creator']
                            if book and len(book) > 2 and author and len(author) > 2:
                                match = 1
                            if 'language' in res:
                                language = res['language']
                            if 'identifier' in res:
                                isbn = res['identifier']
                            if 'gr_id' in res:
                                gr_id = res['gr_id']
                            logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id))
                        if not match:
                            logger.debug("File meta incomplete in %s" % metafile)

                        if not match:  # no author/book from metadata file, and not embedded either
                            match = pattern.match(files)
                            if match:
                                author = match.group("author")
                                book = match.group("book")
                                if len(book) <= 2 or len(author) <= 2:
                                    match = 0
                            if not match:
                                logger.debug("Pattern match failed [%s]" % files)

                        if match:
                            # flag that we found a book in this subdirectory
                            processed_subdirectories.append(subdirectory)

                            # If we have a valid looking isbn, and language != "Unknown", add it to cache
                            if language != "Unknown" and is_valid_isbn(isbn):
                                logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn))
                                # we need to add it to language cache if not already
                                # there, is_valid_isbn has checked length is 10 or 13
                                if len(isbn) == 10:
                                    isbnhead = isbn[0:3]
                                else:
                                    isbnhead = isbn[3:6]
                                match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead)
                                if not match:
                                    myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language))
                                    logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead))
                                else:
                                    logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead))

                            author, authorid, new = addAuthorNameToDB(author)  # get the author name as we know it...

                            if author:
                                # author exists, check if this book by this author is in our database
                                # metadata might have quotes in book name
                                # some books might be stored under a different author name
                                # eg books by multiple authors, books where author is "writing as"
                                # or books we moved to "merge" authors
                                book = book.replace("'", "")

                                # First try and find it under author and bookname
                                # as we may have it under a different bookid or isbn to goodreads/googlebooks
                                # which might have several bookid/isbn for the same book
                                bookid = find_book_in_db(myDB, author, book)

                                if not bookid:
                                    # Title or author name might not match or multiple authors
                                    # See if the gr_id, gb_id is already in our database
                                    if gr_id:
                                        bookid = gr_id
                                    elif gb_id:
                                        bookid = gb_id
                                    else:
                                        bookid = ""

                                    if bookid:
                                        match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid)
                                        if not match:
                                            msg = 'Unable to find book %s by %s in database, trying to add it using '
                                            if bookid == gr_id:
                                                msg += "GoodReads ID " + gr_id
                                            if bookid == gb_id:
                                                msg += "GoogleBooks ID " + gb_id
                                            logger.debug(msg % (book, author))
                                            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id:
                                                GR_ID = GoodReads(gr_id)
                                                GR_ID.find_book(gr_id, None)
                                            elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id:
                                                GB_ID = GoogleBooks(gb_id)
                                                GB_ID.find_book(gb_id, None)
                                            # see if it's there now...
                                            match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid)
                                            if not match:
                                                logger.debug("Unable to add bookid %s to database" % bookid)
                                                bookid = ""

                                if not bookid and isbn:
                                    # See if the isbn is in our database
                                    match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn)
                                    if match:
                                        bookid = match['BookID']

                                if not bookid:
                                    # get author name from parent directory of this book directory
                                    newauthor = os.path.basename(os.path.dirname(r))
                                    # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_
                                    if newauthor.endswith('_'):
                                        newauthor = newauthor[:-1] + '.'
                                    if author.lower() != newauthor.lower():
                                        logger.debug("Trying authorname [%s]" % newauthor)
                                        bookid = find_book_in_db(myDB, newauthor, book)
                                        if bookid:
                                            logger.warn("%s not found under [%s], found under [%s]" %
                                                        (book, author, newauthor))

                                # at this point if we still have no bookid, it looks like we
                                # have author and book title but no database entry for it
                                if not bookid:
                                    if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                                        # Either goodreads doesn't have the book or it didn't match language prefs
                                        # Since we have the book anyway, try and reload it ignoring language prefs
                                        rescan_count += 1
                                        base_url = 'http://www.goodreads.com/search.xml?q='
                                        params = {"key": lazylibrarian.CONFIG['GR_API']}
                                        if author[1] in '. ':
                                            surname = author
                                            forename = ''
                                            while surname[1] in '. ':
                                                forename = forename + surname[0] + '.'
                                                surname = surname[2:].strip()
                                            if author != forename + ' ' + surname:
                                                logger.debug('Stripped authorname [%s] to [%s %s]' %
                                                            (author, forename, surname))
                                                author = forename + ' ' + surname

                                        author = ' '.join(author.split())  # ensure no extra whitespace

                                        searchname = author + ' ' + book
                                        searchname = cleanName(unaccented(searchname))
                                        searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING))
                                        set_url = base_url + searchterm + '&' + urllib.urlencode(params)
                                        try:
                                            rootxml, in_cache = get_xml_request(set_url)
                                            if not len(rootxml):
                                                logger.debug("Error requesting results from GoodReads")
                                            else:
                                                resultxml = rootxml.getiterator('work')
                                                for item in resultxml:
                                                    booktitle = item.find('./best_book/title').text
                                                    book_fuzz = fuzz.token_set_ratio(booktitle, book)
                                                    if book_fuzz >= 98:
                                                        logger.debug("Rescan found %s : %s" % (booktitle, language))
                                                        rescan_hits += 1
                                                        bookid = item.find('./best_book/id').text
                                                        GR_ID = GoodReads(bookid)
                                                        GR_ID.find_book(bookid, None)
                                                        if language and language != "Unknown":
                                                            # set language from book metadata
                                                            logger.debug("Setting language from metadata %s : %s" % (booktitle, language))
                                                            myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' %
                                                                        (language, bookid))
                                                        break
                                                if not bookid:
                                                    logger.warn("GoodReads doesn't know about %s" % book)
                                        except Exception as e:
                                            logger.error("Error finding rescan results: %s" % str(e))

                                    elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                                        # if we get here using googlebooks it's because googlebooks
                                        # doesn't have the book. No point in looking for it again.
                                        logger.warn("GoogleBooks doesn't know about %s" % book)

                                # see if it's there now...
                                if bookid:
                                    cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors '
                                    cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid
                                    check_status = myDB.match(cmd)

                                    if not check_status:
                                        logger.debug('Unable to find bookid %s in database' % bookid)
                                    else:
                                        if check_status['Status'] != 'Open':
                                            # we found a new book
                                            new_book_count += 1
                                            myDB.action(
                                                'UPDATE books set Status="Open" where BookID="%s"' % bookid)

                                        # store book location so we can check if it gets removed
                                        book_filename = os.path.join(r, files)
                                        if not check_status['BookFile']:  # no previous location
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))
                                        # location may have changed since last scan
                                        elif book_filename != check_status['BookFile']:
                                            modified_count += 1
                                            logger.warn("Updating book location for %s %s from %s to %s" %
                                                        (author, book, check_status['BookFile'], book_filename))
                                            logger.debug("%s %s matched %s BookID %s, [%s][%s]" %
                                                        (author, book, check_status['Status'], bookid,
                                                        check_status['AuthorName'], check_status['BookName']))
                                            myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' %
                                                        (book_filename, bookid))

                                        # update cover file to cover.jpg in book folder (if exists)
                                        bookdir = os.path.dirname(book_filename)
                                        coverimg = os.path.join(bookdir, 'cover.jpg')
                                        if os.path.isfile(coverimg):
                                            cachedir = lazylibrarian.CACHEDIR
                                            cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg')
                                            copyfile(coverimg, cacheimg)
                                else:
                                    logger.warn(
                                        "Failed to match book [%s] by [%s] in database" % (book, author))
                            else:
                                if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']:
                                    logger.warn("Add authors to database is disabled")
                                    warned = True

        logger.info("%s/%s new/modified book%s found and added to the database" %
                    (new_book_count, modified_count, plural(new_book_count + modified_count)))
        logger.info("%s file%s processed" % (file_count, plural(file_count)))

        if startdir == destdir:
            # On full library scans, check for missing workpages
            setWorkPages()
            # and books with unknown language
            nolang = myDB.match(
                "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'")
            nolang = nolang['counter']
            if nolang:
                logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang)))
                # show stats if new books were added
            stats = myDB.match(
                "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \
                    sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats")

            st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'],
                 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'],
                 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'],
                 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'],
                 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']}

            for item in st.keys():
                if st[item] is None:
                    st[item] = 0

            if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks":
                logger.debug("GoogleBooks was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoogleBooks language was changed %s time%s" %
                             (st['GB_lang_change'], plural(st['GB_lang_change'])))
            if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads":
                logger.debug("GoodReads was hit %s time%s for books" %
                             (st['GR_book_hits'], plural(st['GR_book_hits'])))
                logger.debug("GoodReads was hit %s time%s for languages" %
                             (st['GR_lang_hits'], plural(st['GR_lang_hits'])))
            logger.debug("LibraryThing was hit %s time%s for languages" %
                         (st['LT_lang_hits'], plural(st['LT_lang_hits'])))
            logger.debug("Language cache was hit %s time%s" %
                         (st['cache_hits'], plural(st['cache_hits'])))
            logger.debug("Unwanted language removed %s book%s" %
                         (st['bad_lang'], plural(st['bad_lang'])))
            logger.debug("Unwanted characters removed %s book%s" %
                         (st['bad_char'], plural(st['bad_char'])))
            logger.debug("Unable to cache language for %s book%s with missing ISBN" %
                         (st['uncached'], plural(st['uncached'])))
            logger.debug("Found %s duplicate book%s" %
                         (st['duplicates'], plural(st['duplicates'])))
            logger.debug("Rescan %s hit%s, %s miss" %
                         (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits))
            logger.debug("Cache %s hit%s, %s miss" %
                         (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS))
            cachesize = myDB.match("select count('ISBN') as counter from languages")
            logger.debug("ISBN Language cache holds %s entries" % cachesize['counter'])

            # Cache any covers and images
            images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"')
            if len(images):
                logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    bookid = item['bookid']
                    bookimg = item['bookimg']
                    # bookname = item['bookname']
                    newimg, success = cache_img("book", bookid, bookimg)
                    if success:
                        myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid))

            images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"')
            if len(images):
                logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images))))
                for item in images:
                    authorid = item['authorid']
                    authorimg = item['authorimg']
                    # authorname = item['authorname']
                    newimg, success = cache_img("author", authorid, authorimg)
                    if success:
                        myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid))

            # On full scan, update bookcounts for all authors, not just new ones - refresh may have located
            # new books for existing authors especially if switched provider gb/gr or changed wanted languages
            authors = myDB.select('select AuthorID from authors')
        else:
            # On single author/book import, just update bookcount for that author
            authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""'))

        logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors))))
        for author in authors:
            update_totals(author['AuthorID'])

        logger.info('Library scan complete')
        return new_book_count

    except Exception:
        logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
def TORDownloadMethod(bookid=None, tor_title=None, tor_url=None):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            if isinstance(value, str):
                value = value.decode('utf-8')  # make unicode
            value = unicodedata.normalize('NFC', value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        request = urllib2.Request(ur'%s' % tor_url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], lazylibrarian.CONFIG['PROXY_TYPE'])
        request.add_header('Accept-encoding', 'gzip')
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = urllib2.urlopen(request, timeout=90)
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
                torrent = f.read()
            else:
                torrent = response.read()

        except socket.timeout:
            logger.warn('Timeout fetching torrent from url: %s' % tor_url)
            return False
        except urllib2.URLError as e:
            logger.warn('Error fetching torrent from url: %s, %s' % (tor_url, e.reason))
            return False
        except ValueError as e:
            logger.warn('Error, invalid url: [%s] %s' % (full_url, str(e)))
            return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
                try:
                    with open(tor_path, 'wb') as torrent_file:
                        torrent_file.write(torrent)
                    logger.debug('Magnet file saved: %s' % tor_path)
                    setperm(tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.debug("Failed to write magnet to file %s, %s" % (tor_path, str(e)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'], tor_name)
            try:
                with open(tor_path, 'wb') as torrent_file:
                    torrent_file.write(torrent)
                setperm(tor_path)
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.debug("Failed to write torrent to file %s, %s" % (tor_path, str(e)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG['UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG['RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url, hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG['QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url)  # returns hash or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG['TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG['DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet', tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url, args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID, lazylibrarian.CONFIG['DELUGE_LABEL'])
                    result = client.call('core.get_torrent_status', downloadID, {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s' % str(e))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid)
        myDB.action('UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' %
                    (Source, downloadID, full_url))
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn('%s: name contains hash, probably unresolved magnet' % Source)
            else:
                tor_title = unaccented_str(tor_title)
                logger.debug('%s setting torrent name to [%s]' % (Source, tor_title))
                myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl="%s"' % (tor_title, full_url))
        return True
    else:
        logger.error(u'Failed to download torrent from %s, %s' % (Source, tor_url))
        myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % full_url)
        return False
Exemple #29
0
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode):

    params = None
    if searchType == "book":
        authorname = book['authorName']
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(book['bookName'])
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan In his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan In his own words"
            bookname = bookname[len(authorname)+1:]

        if provider['BOOKSEARCH'] and provider['BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider['BOOKCAT']: # if not, try general search
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "shortbook":
        authorname = book['authorName']
        while authorname[1] in '. ':  # strip any leading initials
            authorname = authorname[2:].strip()  # and leading whitespace
        # middle initials can't have a dot
        authorname = authorname.replace('. ', ' ')
        authorname = cleanName(authorname)
        bookname = cleanName(book['bookName'])
        if bookname == authorname and book['bookSub']:
            # books like "Spike Milligan: Man of Letters"
            # where we split the title/subtitle on ':'
            bookname = cleanName(book['bookSub'])
        if bookname.startswith(authorname) and len(bookname) > len(authorname):
            # books like "Spike Milligan in his own words"
            # where we don't want to look for "Spike Milligan Spike Milligan in his own words"
            bookname = bookname[len(authorname)+1:]
        if '(' in bookname:
            bookname = bookname.split('(')[0].strip()
        if provider['BOOKSEARCH'] and provider['BOOKCAT']:  # if specific booksearch, use it
            params = {
                "t": provider['BOOKSEARCH'],
                "apikey": api_key,
                "title": bookname,
                "author": authorname,
                "cat": provider['BOOKCAT']
            }
        elif provider['GENERALSEARCH'] and provider['BOOKCAT']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                "q": authorname + ' ' + bookname,
                "cat": provider['BOOKCAT']
            }
    elif searchType == "mag":
        if provider['MAGSEARCH'] and provider['MAGCAT']:  # if specific magsearch, use it
            params = {
                "t": provider['MAGSEARCH'],
                "apikey": api_key,
                "cat": provider['MAGCAT'],
                "q": book['searchterm'],
                "extended": provider['EXTENDED'],
            }
        elif provider['GENERALSEARCH'] and provider['MAGCAT']:
            params = {
               "t": provider['GENERALSEARCH'],
               "apikey": api_key,
               "cat": provider['MAGCAT'],
               "q": book['searchterm'],
               "extended": provider['EXTENDED'],
           }
    else:
        if provider['GENERALSEARCH']:
            params = {
                "t": provider['GENERALSEARCH'],
                "apikey": api_key,
                # this is a general search
                "q": book['searchterm'],
                "extended": provider['EXTENDED'],
            }
    if params:
        logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params)))
    else:
        logger.debug('[NewzNabPlus] - %s No matching search parameters' % searchMode)

    return params
Exemple #30
0
def TORDownloadMethod(bookid=None,
                      tor_title=None,
                      tor_url=None,
                      library='eBook'):
    myDB = database.DBConnection()
    downloadID = False
    Source = ''
    full_url = tor_url  # keep the url as stored in "wanted" table
    if tor_url and tor_url.startswith('magnet'):
        torrent = tor_url  # allow magnet link to write to blackhole and hash to utorrent/rtorrent
    else:
        # h = HTMLParser()
        # tor_url = h.unescape(tor_url)
        # HTMLParser is probably overkill, we only seem to get &amp;
        #
        tor_url = tor_url.replace('&amp;', '&')

        if '&file=' in tor_url:
            # torznab results need to be re-encoded
            # had a problem with torznab utf-8 encoded strings not matching
            # our utf-8 strings because of long/short form differences
            url, value = tor_url.split('&file=', 1)
            value = makeUnicode(value)  # ensure unicode
            value = unicodedata.normalize('NFC',
                                          value)  # normalize to short form
            value = value.encode('unicode-escape')  # then escape the result
            value = value.replace(' ', '%20')  # and encode any spaces
            tor_url = url + '&file=' + value

        # strip url back to the .torrent as some sites add parameters
        if not tor_url.endswith('.torrent'):
            if '.torrent' in tor_url:
                tor_url = tor_url.split('.torrent')[0] + '.torrent'

        headers = {'Accept-encoding': 'gzip', 'User-Agent': USER_AGENT}
        proxies = proxyList()
        try:
            r = requests.get(tor_url,
                             headers=headers,
                             timeout=90,
                             proxies=proxies)
        except requests.exceptions.Timeout:
            logger.warn('Timeout fetching file from url: %s' % tor_url)
            return False
        except Exception as e:
            if hasattr(e, 'reason'):
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, e.reason))
            else:
                logger.warn('%s fetching file from url: %s, %s' %
                            (type(e).__name__, tor_url, str(e)))
            return False

        torrent = r.content

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_BLACKHOLE']:
        Source = "BLACKHOLE"
        logger.debug("Sending %s to blackhole" % tor_title)
        tor_name = cleanName(tor_title).replace(' ', '_')
        if tor_url and tor_url.startswith('magnet'):
            if lazylibrarian.CONFIG['TOR_CONVERT_MAGNET']:
                hashid = CalcTorrentHash(tor_url)
                tor_name = 'meta-' + hashid + '.torrent'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                result = magnet2torrent(tor_url, tor_path)
                if result is not False:
                    logger.debug('Magnet file saved as: %s' % tor_path)
                    downloadID = Source
            else:
                tor_name += '.magnet'
                tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                        tor_name)
                msg = ''
                try:
                    msg = 'Opening '
                    with open(tor_path, 'wb') as torrent_file:
                        msg += 'Writing '
                        if isinstance(torrent, unicode):
                            torrent = torrent.encode('iso-8859-1')
                        torrent_file.write(torrent)
                    msg += 'SettingPerm'
                    setperm(tor_path)
                    msg += 'Saved'
                    logger.debug('Magnet file saved: %s' % tor_path)
                    downloadID = Source
                except Exception as e:
                    logger.debug("Failed to write magnet to file: %s %s" %
                                 (type(e).__name__, str(e)))
                    logger.debug("Progress: %s" % msg)
                    logger.debug("Filename [%s]" % (repr(tor_path)))
                    return False
        else:
            tor_name += '.torrent'
            tor_path = os.path.join(lazylibrarian.CONFIG['TORRENT_DIR'],
                                    tor_name)
            msg = ''
            try:
                msg = 'Opening '
                with open(tor_path, 'wb') as torrent_file:
                    msg += 'Writing '
                    if isinstance(torrent, unicode):
                        torrent = torrent.encode('iso-8859-1')
                    torrent_file.write(torrent)
                msg += 'SettingPerm '
                setperm(tor_path)
                msg += 'Saved'
                logger.debug('Torrent file saved: %s' % tor_name)
                downloadID = Source
            except Exception as e:
                logger.debug("Failed to write torrent to file: %s %s" %
                             (type(e).__name__, str(e)))
                logger.debug("Progress: %s" % msg)
                logger.debug("Filename [%s]" % (repr(tor_path)))
                return False

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_UTORRENT'] and lazylibrarian.CONFIG[
            'UTORRENT_HOST']:
        logger.debug("Sending %s to Utorrent" % tor_title)
        Source = "UTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = utorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = utorrent.nameTorrent(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_RTORRENT'] and lazylibrarian.CONFIG[
            'RTORRENT_HOST']:
        logger.debug("Sending %s to rTorrent" % tor_title)
        Source = "RTORRENT"
        hashid = CalcTorrentHash(torrent)
        downloadID = rtorrent.addTorrent(tor_url,
                                         hashid)  # returns hash or False
        if downloadID:
            tor_title = rtorrent.getName(downloadID)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_QBITTORRENT'] and lazylibrarian.CONFIG[
                'QBITTORRENT_HOST']:
        logger.debug("Sending %s to qbittorrent" % tor_title)
        Source = "QBITTORRENT"
        hashid = CalcTorrentHash(torrent)
        status = qbittorrent.addTorrent(tor_url,
                                        hashid)  # returns True or False
        if status:
            downloadID = hashid
            tor_title = qbittorrent.getName(hashid)

    if lazylibrarian.CONFIG[
            'TOR_DOWNLOADER_TRANSMISSION'] and lazylibrarian.CONFIG[
                'TRANSMISSION_HOST']:
        logger.debug("Sending %s to Transmission" % tor_title)
        Source = "TRANSMISSION"
        downloadID = transmission.addTorrent(tor_url)  # returns id or False
        if downloadID:
            # transmission returns it's own int, but we store hashid instead
            downloadID = CalcTorrentHash(torrent)
            tor_title = transmission.getTorrentFolder(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \
            lazylibrarian.CONFIG['SYNOLOGY_HOST']:
        logger.debug("Sending %s to Synology" % tor_title)
        Source = "SYNOLOGY_TOR"
        downloadID = synology.addTorrent(tor_url)  # returns id or False
        if downloadID:
            tor_title = synology.getName(downloadID)

    if lazylibrarian.CONFIG['TOR_DOWNLOADER_DELUGE'] and lazylibrarian.CONFIG[
            'DELUGE_HOST']:
        logger.debug("Sending %s to Deluge" % tor_title)
        if not lazylibrarian.CONFIG['DELUGE_USER']:
            # no username, talk to the webui
            Source = "DELUGEWEBUI"
            downloadID = deluge.addTorrent(tor_url)  # returns hash or False
            if downloadID:
                tor_title = deluge.getTorrentFolder(downloadID)
        else:
            # have username, talk to the daemon
            Source = "DELUGERPC"
            client = DelugeRPCClient(lazylibrarian.CONFIG['DELUGE_HOST'],
                                     lazylibrarian.CONFIG['DELUGE_URL_BASE'],
                                     int(lazylibrarian.CONFIG['DELUGE_PORT']),
                                     lazylibrarian.CONFIG['DELUGE_USER'],
                                     lazylibrarian.CONFIG['DELUGE_PASS'])
            try:
                client.connect()
                args = {"name": tor_title}
                if tor_url.startswith('magnet'):
                    downloadID = client.call('core.add_torrent_magnet',
                                             tor_url, args)
                else:
                    downloadID = client.call('core.add_torrent_url', tor_url,
                                             args)
                if downloadID:
                    if lazylibrarian.CONFIG['DELUGE_LABEL']:
                        _ = client.call('label.set_torrent', downloadID,
                                        lazylibrarian.CONFIG['DELUGE_LABEL'])
                    result = client.call('core.get_torrent_status', downloadID,
                                         {})
                    # for item in result:
                    #    logger.debug ('Deluge RPC result %s: %s' % (item, result[item]))
                    if 'name' in result:
                        tor_title = result['name']

            except Exception as e:
                logger.debug('DelugeRPC failed %s %s' %
                             (type(e).__name__, str(e)))
                return False

    if not Source:
        logger.warn('No torrent download method is enabled, check config.')
        return False

    if downloadID:
        if tor_title:
            if downloadID.upper() in tor_title.upper():
                logger.warn(
                    '%s: name contains hash, probably unresolved magnet' %
                    Source)
            else:
                tor_title = unaccented_str(tor_title)
                # need to check against reject words list again as the name may have changed
                # library = magazine eBook AudioBook to determine which reject list
                # but we can't easily do the per-magazine rejects
                if library == 'magazine':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_MAGS'])
                elif library == 'eBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_WORDS'])
                elif library == 'AudioBook':
                    reject_list = getList(lazylibrarian.CONFIG['REJECT_AUDIO'])
                else:
                    logger.debug("Invalid library [%s] in TORDownloadMethod" %
                                 library)
                    reject_list = []

                rejected = False
                lower_title = tor_title.lower()
                for word in reject_list:
                    if word in lower_title:
                        rejected = True
                        logger.debug("Rejecting torrent name %s, contains %s" %
                                     (tor_title, word))
                        break
                if rejected:
                    myDB.action(
                        'UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                        (full_url, ))
                    delete_task(Source, downloadID, True)
                    return False
                else:
                    logger.debug('%s setting torrent name to [%s]' %
                                 (Source, tor_title))
                    myDB.action('UPDATE wanted SET NZBtitle=? WHERE NZBurl=?',
                                (tor_title, full_url))

        if library == 'eBook':
            myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?',
                        (bookid, ))
        elif library == 'AudioBook':
            myDB.action(
                'UPDATE books SET audiostatus="Snatched" WHERE BookID=?',
                (bookid, ))
        myDB.action(
            'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?',
            (Source, downloadID, full_url))
        return True

    logger.error('Failed to download torrent from %s, %s' % (Source, tor_url))
    myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?',
                (full_url, ))
    return False
Exemple #31
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count('AuthorID') as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID="%s"' %
                        seriesid)
    seriesname = result['SeriesName']
    members = getSeriesMembers(seriesid)
    if members:
        myDB = database.DBConnection()
        for member in members:
            #order = member[0]
            bookname = member[1]
            authorname = member[2]

            base_url = 'http://www.goodreads.com/search.xml?q='
            params = {"key": lazylibrarian.CONFIG['GR_API']}
            searchname = bookname + ' ' + authorname
            searchname = cleanName(unaccented(searchname))
            searchterm = urllib.quote_plus(
                searchname.encode(lazylibrarian.SYS_ENCODING))
            set_url = base_url + searchterm + '&' + urllib.urlencode(params)
            authorid = ''
            try:
                rootxml, in_cache = get_xml_request(set_url)
                if len(rootxml):
                    resultxml = rootxml.getiterator('work')
                    for item in resultxml:
                        booktitle = item.find('./best_book/title').text
                        book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                        if book_fuzz >= 98:
                            author = item.find('./best_book/author/name').text
                            authorid = item.find('./best_book/author/id').text
                            logger.debug(
                                "Author Search found %s %s, authorid %s" %
                                (author, booktitle, authorid))
                            break
                if not authorid:  # try again with title only
                    searchname = cleanName(unaccented(bookname))
                    searchterm = urllib.quote_plus(
                        searchname.encode(lazylibrarian.SYS_ENCODING))
                    set_url = base_url + searchterm + '&' + urllib.urlencode(
                        params)
                    rootxml, in_cache = get_xml_request(set_url)
                    if len(rootxml):
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            booktitle = item.find('./best_book/title').text
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                author = item.find(
                                    './best_book/author/name').text
                                authorid = item.find(
                                    './best_book/author/id').text
                                logger.debug(
                                    "Title Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                if not authorid:
                    logger.warn("GoodReads doesn't know about %s %s" %
                                (authorname, bookname))
            except Exception as e:
                logger.error("Error finding goodreads results: %s" % str(e))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count('AuthorID') as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth