Python urlencode 예제들, lib.six.moves.urllib_parse.urlencode Python 예제들

예제 #1

0

파일 보기

파일: grsync.py 프로젝트: nicecai/LazyLibrarian

    def BookToList(book_id, shelf_name, action='add'):
        global client
        if action == 'remove':
            body = urlencode({
                'name': shelf_name,
                'book_id': book_id,
                'a': 'remove'
            })
        else:
            body = urlencode({'name': shelf_name, 'book_id': book_id})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        time_now = int(time.time())
        if time_now <= lazylibrarian.LAST_GOODREADS:
            time.sleep(1)
            lazylibrarian.LAST_GOODREADS = time_now
        try:
            response, content = client.request(
                '%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com',
                'POST', body, headers)
        except Exception as e:
            return False, "Exception in client.request: %s %s" % (
                type(e).__name__, str(e))

        if response['status'] != '200' and response['status'] != '201':
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, content

예제 #2

0

파일 보기

파일: grsync.py 프로젝트: cdancette/LazyLibrarian

    def BookToList(book_id, shelf_name, action='add'):
        global client
        if action == 'remove':
            body = urlencode({
                'name': shelf_name,
                'book_id': book_id,
                'a': 'remove'
            })
        else:
            body = urlencode({'name': shelf_name, 'book_id': book_id})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        gr_api_sleep()
        try:
            response, content = client.request(
                '%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com',
                'POST', body, headers)
        except Exception as e:
            logger.error("Exception in client.request: %s %s" %
                         (type(e).__name__, traceback.format_exc()))
            return False, "Error in client.request: see error log"

        if not response['status'].startswith('2'):
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, content

예제 #3

0

파일 보기

파일: grsync.py 프로젝트: jamiees2/LazyLibrarian

    def follow_author(self, authorid=None, follow=True):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn(
                "Goodreads follow author error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(
                                      lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'],
                            lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # follow https://www.goodreads.com/author_followings?id=AUTHOR_ID&format=xml
        # unfollow https://www.goodreads.com/author_followings/AUTHOR_FOLLOWING_ID?format=xml
        time_now = int(time.time())
        if time_now <= lazylibrarian.LAST_GOODREADS:
            time.sleep(1)
            lazylibrarian.LAST_GOODREADS = time_now

        if follow:
            body = urlencode({'id': authorid, 'format': 'xml'})
            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
            try:
                response, content = client.request(
                    '%s/author_followings' % 'https://www.goodreads.com',
                    'POST', body, headers)
            except Exception as e:
                logger.error("Exception in client.request: %s %s" %
                             (type(e).__name__, traceback.format_exc()))
                return False, "Error in client.request: see error log"
        else:
            body = urlencode({'format': 'xml'})
            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
            try:
                response, content = client.request(
                    '%s/author_followings/%s' %
                    ('https://www.goodreads.com', authorid), 'DELETE', body,
                    headers)
            except Exception as e:
                logger.error("Exception in client.request: %s %s" %
                             (type(e).__name__, traceback.format_exc()))
                return False, "Error in client.request: see error log"

        if follow and response['status'] == '422':
            return True, 'Already following'

        if response['status'].startswith('2'):
            if follow:
                return True, content.split('<id>')[1].split('</id>')[0]
            return True, ''
        return False, 'Failure status: %s' % response['status']

예제 #4

0

파일 보기

파일: grsync.py 프로젝트: andyuc88/LazyLibrarian

    def create_shelf(self, shelf='lazylibrarian'):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads create shelf error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False
        body = urlencode({'user_shelf[name]': shelf.lower()})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        gr_api_sleep()

        try:
            response, content = client.request('%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST',
                                               body, headers)
        except Exception as e:
            logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
            return False, "Error in client.request: see error log"

        if not response['status'].startswith('2'):
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, ''

예제 #5

0

파일 보기

파일: bookwork.py 프로젝트: knobunc/LazyLibrarian

def getSeriesMembers(seriesID=None):
    """ Ask librarything or goodreads for details on all books in a series
        order, bookname, authorname, workid, authorid
        (workid and authorid are goodreads only)
        Return as a list of lists """
    results = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/series/' + seriesID + '?' + urlencode(params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting series %s" % seriesID)
                return []
        except Exception as e:
            logger.error("%s finding series %s: %s" % (type(e).__name__, seriesID, str(e)))
            return []

        works = rootxml.find('series/series_works')
        books = works.getiterator('series_work')
        if books is None:
            logger.warn('No books found for %s' % seriesID)
            return []
        for book in books:
            mydict = {}
            for mykey, location in [('order', 'user_position'),
                                    ('bookname', 'work/best_book/title'),
                                    ('authorname', 'work/best_book/author/name'),
                                    ('workid', 'work/id'),
                                    ('authorid', 'work/best_book/author/id')
                                    ]:
                if book.find(location) is not None:
                    mydict[mykey] = book.find(location).text
                else:
                    mydict[mykey] = ""
            results.append([mydict['order'], mydict['bookname'], mydict['authorname'],
                            mydict['workid'], mydict['authorid']])
    else:
        data = getBookWork(None, "SeriesPage", seriesID)
        if data:
            try:
                table = data.split('class="worksinseries"')[1].split('</table>')[0]
                rows = table.split('<tr')
                for row in rows:
                    if 'href=' in row:
                        booklink = row.split('href="')[1]
                        bookname = booklink.split('">')[1].split('<')[0]
                        # booklink = booklink.split('"')[0]
                        try:
                            authorlink = row.split('href="')[2]
                            authorname = authorlink.split('">')[1].split('<')[0]
                            # authorlink = authorlink.split('"')[0]
                            order = row.split('class="order">')[1].split('<')[0]
                            results.append([order, bookname, authorname, '', ''])
                        except IndexError:
                            logger.debug('Incomplete data in series table for series %s' % seriesID)
            except IndexError:
                if 'class="worksinseries"' in data:  # error parsing, or just no series data available?
                    logger.debug('Error in series table for series %s' % seriesID)
    return results

예제 #6

0

파일 보기

파일: grsync.py 프로젝트: geoffg41/LazyLibrarian

    def get_shelf_list(self):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads get shelf error: Please authorise first")
            return []
        else:
            #
            # loop over each page of shelves
            #     loop over each shelf
            #         add shelf to list
            #
            consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                      secret=str(lazylibrarian.CONFIG['GR_SECRET']))
            token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
            client = oauth.Client(consumer, token)
            user_id = self.getUserId()

            current_page = 0
            shelves = []
            page_shelves = 1
            while page_shelves:
                current_page = current_page + 1
                page_shelves = 0
                shelf_template = Template('${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}')
                body = urlencode({})
                headers = {'Content-Type': 'application/x-www-form-urlencoded'}
                request_url = shelf_template.substitute(base='https://www.goodreads.com', user_id=user_id,
                                                        page=current_page, key=lazylibrarian.CONFIG['GR_API'])
                gr_api_sleep()
                try:
                    response, content = client.request(request_url, 'GET', body, headers)
                except Exception as e:
                    logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
                    return shelves

                if not response['status'].startswith('2'):
                    logger.error('Failure status: %s for page %s' % (response['status'], current_page))
                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug(request_url)
                else:
                    xmldoc = xml.dom.minidom.parseString(content)

                    shelf_list = xmldoc.getElementsByTagName('shelves')[0]
                    for item in shelf_list.getElementsByTagName('user_shelf'):
                        shelf_name = item.getElementsByTagName('name')[0].firstChild.nodeValue
                        shelf_count = item.getElementsByTagName('book_count')[0].firstChild.nodeValue
                        shelf_exclusive = item.getElementsByTagName('exclusive_flag')[0].firstChild.nodeValue
                        shelves.append({'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive})
                        page_shelves += 1

                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                            logger.debug('Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive))

                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug('Found %s shelves on page %s' % (page_shelves, current_page))

            logger.debug('Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1)))
            # print shelves
            return shelves

예제 #7

0

파일 보기

파일: grsync.py 프로젝트: cdancette/LazyLibrarian

    def create_shelf(self, shelf='lazylibrarian'):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads create shelf error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(
                                      lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'],
                            lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False
        body = urlencode({'user_shelf[name]': shelf.lower()})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        gr_api_sleep()

        try:
            response, content = client.request(
                '%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST',
                body, headers)
        except Exception as e:
            logger.error("Exception in client.request: %s %s" %
                         (type(e).__name__, traceback.format_exc()))
            return False, "Error in client.request: see error log"

        if not response['status'].startswith('2'):
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, ''

예제 #8

0

파일 보기

파일: grsync.py 프로젝트: nicecai/LazyLibrarian

    def create_shelf(self, shelf='lazylibrarian'):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads create shelf error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(
                                      lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'],
                            lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # could also pass [featured] [exclusive_flag] [sortable_flag] all default to False
        body = urlencode({'user_shelf[name]': shelf.lower()})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        time_now = int(time.time())
        if time_now <= lazylibrarian.LAST_GOODREADS:
            time.sleep(1)
            lazylibrarian.LAST_GOODREADS = time_now
        try:
            response, content = client.request(
                '%s/user_shelves.xml' % 'https://www.goodreads.com', 'POST',
                body, headers)
        except Exception as e:
            return False, "Exception in client.request: %s %s" % (
                type(e).__name__, str(e))

        if response['status'] != '200' and response['status'] != '201':
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, ''

예제 #9

0

파일 보기

파일: grsync.py 프로젝트: cdancette/LazyLibrarian

 def getShelfBooks(page, shelf_name):
     global client, user_id
     data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a'
     data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}'
     owned_template = Template(data)
     body = urlencode({})
     headers = {'Content-Type': 'application/x-www-form-urlencoded'}
     request_url = owned_template.substitute(
         base='https://www.goodreads.com',
         user_id=user_id,
         page=page,
         key=lazylibrarian.CONFIG['GR_API'],
         shelf_name=shelf_name)
     gr_api_sleep()
     try:
         response, content = client.request(request_url, 'GET', body,
                                            headers)
     except Exception as e:
         logger.error("Exception in client.request: %s %s" %
                      (type(e).__name__, traceback.format_exc()))
         return "Error in client.request: see error log"
     if not response['status'].startswith('2'):
         logger.error('Failure status: %s for page %s' %
                      (response['status'], page))
     return content

예제 #10

0

파일 보기

파일: grsync.py 프로젝트: nicecai/LazyLibrarian

 def getShelfBooks(page, shelf_name):
     global client, user_id
     data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a'
     data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}'
     owned_template = Template(data)
     body = urlencode({})
     headers = {'Content-Type': 'application/x-www-form-urlencoded'}
     request_url = owned_template.substitute(
         base='https://www.goodreads.com',
         user_id=user_id,
         page=page,
         key=lazylibrarian.CONFIG['GR_API'],
         shelf_name=shelf_name)
     time_now = int(time.time())
     if time_now <= lazylibrarian.LAST_GOODREADS:
         time.sleep(1)
         lazylibrarian.LAST_GOODREADS = time_now
     try:
         response, content = client.request(request_url, 'GET', body,
                                            headers)
     except Exception as e:
         return "Exception in client.request: %s %s" % (type(e).__name__,
                                                        str(e))
     if response['status'] != '200':
         raise Exception('Failure status: %s for page %s' %
                         (response['status'], page))
     return content

예제 #11

0

파일 보기

파일: __init__.py 프로젝트: Cliffnla/LazyLibrarian-1

    def __str__(self):
        data = {
            'oauth_consumer_key': self.key,
            'oauth_consumer_secret': self.secret
        }

        return urlencode(data)

예제 #12

0

파일 보기

파일: utorrent.py 프로젝트: kuuratsanik/LazyLibrarian-1

    def _action(self, params, body=None, content_type=None):
        # noinspection PyTypeChecker
        url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(
            params)
        request = Request(url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', getUserAgent())

        if body:
            if PY2:
                request.add_data(body)
            else:
                request.data(body)
            request.add_header('Content-length', len(body))
        if content_type:
            request.add_header('Content-type', content_type)

        try:
            response = self.opener.open(request)
            return response.code, json.loads(response.read())
        except HTTPError as err:
            logger.debug('URL: %s' % url)
            logger.debug('uTorrent webUI raised the following error: ' +
                         str(err))

예제 #13

0

파일 보기

파일: grsync.py 프로젝트: DobyTang/LazyLibrarian

    def get_shelf_list(self):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads get shelf error: Please authorise first")
            return []
        else:
            #
            # loop over each page of shelves
            #     loop over each shelf
            #         add shelf to list
            #
            consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                      secret=str(lazylibrarian.CONFIG['GR_SECRET']))
            token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
            client = oauth.Client(consumer, token)
            user_id = self.getUserId()

            current_page = 0
            shelves = []
            page_shelves = 1
            while page_shelves:
                current_page = current_page + 1
                page_shelves = 0
                shelf_template = Template('${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}')
                body = urlencode({})
                headers = {'Content-Type': 'application/x-www-form-urlencoded'}
                request_url = shelf_template.substitute(base='https://www.goodreads.com', user_id=user_id,
                                                        page=current_page, key=lazylibrarian.CONFIG['GR_API'])
                gr_api_sleep()
                try:
                    response, content = client.request(request_url, 'GET', body, headers)
                except Exception as e:
                    logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
                    return shelves

                if not response['status'].startswith('2'):
                    logger.error('Failure status: %s for page %s' % (response['status'], current_page))
                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug(request_url)
                else:
                    xmldoc = xml.dom.minidom.parseString(content)

                    shelf_list = xmldoc.getElementsByTagName('shelves')[0]
                    for item in shelf_list.getElementsByTagName('user_shelf'):
                        shelf_name = item.getElementsByTagName('name')[0].firstChild.nodeValue
                        shelf_count = item.getElementsByTagName('book_count')[0].firstChild.nodeValue
                        shelf_exclusive = item.getElementsByTagName('exclusive_flag')[0].firstChild.nodeValue
                        shelves.append({'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive})
                        page_shelves += 1

                        if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                            logger.debug('Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive))

                    if lazylibrarian.LOGLEVEL & lazylibrarian.log_grsync:
                        logger.debug('Found %s shelves on page %s' % (page_shelves, current_page))

            logger.debug('Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1)))
            # print shelves
            return shelves

예제 #14

0

파일 보기

파일: bookwork.py 프로젝트: eivl/LazyLibrarian

def getBookAuthors(bookid):
    """ Get a list of authors contributing to a book from the goodreads bookpage or the librarything bookwork file """
    authorlist = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urlencode(params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book %s" % bookid)
                return []
        except Exception as e:
            logger.error("%s finding book %s: %s" % (type(e).__name__, bookid, str(e)))
            return []

        book = rootxml.find('book')
        authors = book.find('authors')
        anames = authors.getiterator('author')
        if anames is None:
            logger.warn('No authors found for %s' % bookid)
            return []
        for aname in anames:
            author = {}
            if aname.find('id') is not None:
                author['id'] = aname.find('id').text
            if aname.find('name') is not None:
                author['name'] = aname.find('name').text
            if aname.find('role') is not None:
                role = aname.find('role').text
                if not role:
                    role = ''
                author['role'] = role
            if author:
                authorlist.append(author)
    else:
        data = getBookWork(bookid, "Authors")
        if data:
            try:
                data = data.split('otherauthors_container')[1].split('</table>')[0].split('<table')[1].split('>', 1)[1]
            except IndexError:
                data = ''

        authorlist = []
        if data and 'Work?' in data:
            try:
                rows = data.split('<tr')
                for row in rows[2:]:
                    author = {}
                    col = row.split('<td>')
                    author['name'] = col[1].split('">')[1].split('<')[0]
                    author['role'] = col[2].split('<')[0]
                    author['type'] = col[3].split('<')[0]
                    author['work'] = col[4].split('<')[0]
                    author['status'] = col[5].split('<')[0]
                    authorlist.append(author)
            except IndexError:
                logger.debug('Error parsing authorlist for %s' % bookid)
    return authorlist

예제 #15

0

파일 보기

파일: bookwork.py 프로젝트: knobunc/LazyLibrarian

def getBookAuthors(bookid):
    """ Get a list of authors contributing to a book from the goodreads bookpage or the librarything bookwork file """
    authorlist = []
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urlencode(params)
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if rootxml is None:
                logger.debug("Error requesting book %s" % bookid)
                return []
        except Exception as e:
            logger.error("%s finding book %s: %s" % (type(e).__name__, bookid, str(e)))
            return []

        book = rootxml.find('book')
        authors = book.find('authors')
        anames = authors.getiterator('author')
        if anames is None:
            logger.warn('No authors found for %s' % bookid)
            return []
        for aname in anames:
            author = {}
            if aname.find('id') is not None:
                author['id'] = aname.find('id').text
            if aname.find('name') is not None:
                author['name'] = aname.find('name').text
            if aname.find('role') is not None:
                role = aname.find('role').text
                if not role:
                    role = ''
                author['role'] = role
            if author:
                authorlist.append(author)
    else:
        data = getBookWork(bookid, "Authors")
        if data:
            try:
                data = data.split('otherauthors_container')[1].split('</table>')[0].split('<table')[1].split('>', 1)[1]
            except IndexError:
                data = ''

        authorlist = []
        if data and 'Work?' in data:
            try:
                rows = data.split('<tr')
                for row in rows[2:]:
                    author = {}
                    col = row.split('<td>')
                    author['name'] = col[1].split('">')[1].split('<')[0]
                    author['role'] = col[2].split('<')[0]
                    author['type'] = col[3].split('<')[0]
                    author['work'] = col[4].split('<')[0]
                    author['status'] = col[5].split('<')[0]
                    authorlist.append(author)
            except IndexError:
                logger.debug('Error parsing authorlist for %s' % bookid)
    return authorlist

예제 #16

0

파일 보기

파일: __init__.py 프로젝트: Cliffnla/LazyLibrarian-1

 def get_normalized_parameters(self):
     """Return a string that contains the parameters that must be signed."""
     items = [(k, v) for k, v in list(self.items()) if k != 'oauth_signature']
     encoded_str = urlencode(sorted(items), True)
     # Encode signature parameters per Oauth Core 1.0 protocol
     # spec draft 7, section 3.6
     # (http://tools.ietf.org/html/draft-hammer-oauth-07#section-3.6)
     # Spaces must be encoded with "%20" instead of "+"
     return encoded_str.replace('+', '%20')

예제 #17

0

파일 보기

파일: grsync.py 프로젝트: andyuc88/LazyLibrarian

    def follow_author(self, authorid=None, follow=True):
        global consumer, client, token, user_id
        if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \
                lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']:
            logger.warn("Goodreads follow author error: Please authorise first")
            return False, 'Unauthorised'

        consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']),
                                  secret=str(lazylibrarian.CONFIG['GR_SECRET']))
        token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET'])
        client = oauth.Client(consumer, token)
        user_id = self.getUserId()

        # follow https://www.goodreads.com/author_followings?id=AUTHOR_ID&format=xml
        # unfollow https://www.goodreads.com/author_followings/AUTHOR_FOLLOWING_ID?format=xml
        gr_api_sleep()

        if follow:
            body = urlencode({'id': authorid, 'format': 'xml'})
            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
            try:
                response, content = client.request('%s/author_followings' % 'https://www.goodreads.com', 'POST', body,
                                                   headers)
            except Exception as e:
                logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
                return False, "Error in client.request: see error log"
        else:
            body = urlencode({'format': 'xml'})
            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
            try:
                response, content = client.request('%s/author_followings/%s' % ('https://www.goodreads.com', authorid),
                                                   'DELETE', body, headers)
            except Exception as e:
                logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
                return False, "Error in client.request: see error log"

        if follow and response['status'] == '422':
            return True, 'Already following'

        if response['status'].startswith('2'):
            if follow:
                return True, content.split('<id>')[1].split('</id>')[0]
            return True, ''
        return False, 'Failure status: %s' % response['status']

예제 #18

0

파일 보기

파일: grsync.py 프로젝트: andyuc88/LazyLibrarian

    def BookToList(book_id, shelf_name, action='add'):
        global client
        if action == 'remove':
            body = urlencode({'name': shelf_name, 'book_id': book_id, 'a': 'remove'})
        else:
            body = urlencode({'name': shelf_name, 'book_id': book_id})
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        gr_api_sleep()
        try:
            response, content = client.request('%s/shelf/add_to_shelf.xml' % 'https://www.goodreads.com', 'POST',
                                               body, headers)
        except Exception as e:
            logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
            return False, "Error in client.request: see error log"

        if not response['status'].startswith('2'):
            msg = 'Failure status: %s' % response['status']
            return False, msg
        return True, content

예제 #19

0

파일 보기

파일: qbittorrent.py 프로젝트: DobyTang/LazyLibrarian

 def _get_sid(self, base_url, username, password):
     # login so we can capture SID cookie
     login_data = makeBytestr(urlencode({'username': username, 'password': password}))
     try:
         _ = self.opener.open(base_url + '/login', login_data)
     except Exception as err:
         logger.error('Error getting SID. qBittorrent %s: %s' % (type(err).__name__, str(err)))
         logger.warn('Unable to log in to %s/login' % base_url)
         return
     for cookie in self.cookiejar:
         logger.debug('login cookie: ' + cookie.name + ', value: ' + cookie.value)
     return

예제 #20

0

파일 보기

 def _get_sid(self, base_url, username, password):
     # login so we can capture SID cookie
     login_data = makeBytestr(urlencode({'username': username, 'password': password}))
     try:
         _ = self.opener.open(base_url + '/login', login_data)
     except Exception as err:
         logger.error('Error getting SID. qBittorrent %s: %s' % (type(err).__name__, str(err)))
         logger.warn('Unable to log in to %s/login' % base_url)
         return
     for cookie in self.cookiejar:
         logger.debug('login cookie: ' + cookie.name + ', value: ' + cookie.value)
     return

예제 #21

0

파일 보기

    def _sendProwl(prowl_api=None, prowl_priority=None, event=None, message=None, force=False):

        title = "LazyLibrarian"

        # suppress notifications if the notifier is disabled but the notify options are checked
        if not lazylibrarian.CONFIG['USE_PROWL'] and not force:
            return False

        if prowl_api is None:
            prowl_api = lazylibrarian.CONFIG['PROWL_APIKEY']

        if prowl_priority is None:
            prowl_priority = lazylibrarian.CONFIG['PROWL_PRIORITY']

        if PY2:
            message = message.encode(lazylibrarian.SYS_ENCODING)

        logger.debug(u"Prowl: title: " + title)
        logger.debug(u"Prowl: event: " + event)
        logger.debug(u"Prowl: message: " + message)

        data = {'event': event,
                'description': message,
                'application': title,
                'apikey': prowl_api,
                'priority': prowl_priority
                }

        try:
            http_handler = HTTPSConnection("api.prowlapp.com")

            http_handler.request("POST",
                                 "/publicapi/add",
                                 headers={'Content-type': "application/x-www-form-urlencoded"},
                                 body=urlencode(data))

            response = http_handler.getresponse()
            request_status = response.status

            if request_status == 200:
                logger.info('Prowl notifications sent.')
                return True
            elif request_status == 401:
                logger.info('Prowl auth failed: %s' % response.reason)
                return False
            else:
                logger.info('Prowl notification failed.')
                return False

        except Exception as e:
            logger.warn('Error sending to Prowl: %s' % e)
            return False

예제 #22

0

파일 보기

파일: qbittorrent.py 프로젝트: jhorourke2/LazyLibrarian

    def _command(self, command, args=None, content_type=None, files=None):
        logger.debug('QBittorrent WebAPI Command: %s' % command)
        url = self.base_url + '/' + command
        data = None
        headers = dict()

        if files or content_type == 'multipart/form-data':
            data, headers = encode_multipart(
                args, files, '-------------------------acebdf13572468')
        else:
            if args:
                data = makeBytestr(urlencode(args))
            if content_type:
                headers['Content-Type'] = content_type

        request = Request(url, data, headers)

        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', USER_AGENT)

        try:
            response = self.opener.open(request)
            try:
                contentType = response.headers['content-type']
            except KeyError:
                contentType = ''

            resp = response.read()
            # some commands return json
            if contentType == 'application/json':
                if resp:
                    return json.loads(resp)
                return ''
            else:
                # some commands return plain text
                resp = makeUnicode(resp)
                logger.debug("QBitTorrent returned %s" % resp)
                if command == 'version/api':
                    return resp
                # some just return Ok. or Fails.
                if resp and resp != 'Ok.':
                    return False
            # some commands return nothing but response code (always 200)
            return True
        except URLError as err:
            logger.debug('Failed URL: %s' % url)
            logger.debug('QBitTorrent webUI raised the following error: %s' %
                         err.reason)
            return False

예제 #23

0

파일 보기

def _getJSON(URL, params):
    # Get JSON response from URL
    # Return json,True or error_msg,False

    URL += "/?%s" % urlencode(params)
    result, success = fetchURL(URL, retry=False)
    if success:
        try:
            result_json = json.loads(result)
            return result_json, True
        except (ValueError, AttributeError):
            return "Could not convert response to json", False

    return "getJSON returned %s" % result, False

예제 #24

0

파일 보기

파일: synology.py 프로젝트: andyuc88/LazyLibrarian

def _getJSON(URL, params):
    # Get JSON response from URL
    # Return json,True or error_msg,False

    URL += "/?%s" % urlencode(params)
    result, success = fetchURL(URL, retry=False)
    if success:
        try:
            result_json = json.loads(result)
            return result_json, True
        except (ValueError, AttributeError):
            return "Could not convert response to json", False

    return "getJSON returned %s" % result, False

예제 #25

0

파일 보기

파일: qbittorrent.py 프로젝트: DobyTang/LazyLibrarian

    def _command(self, command, args=None, content_type=None, files=None):
        logger.debug('QBittorrent WebAPI Command: %s' % command)
        url = self.base_url + '/' + command
        data = None
        headers = dict()

        if files or content_type == 'multipart/form-data':
            data, headers = encode_multipart(args, files, '-------------------------acebdf13572468')
        else:
            if args:
                data = makeBytestr(urlencode(args))
            if content_type:
                headers['Content-Type'] = content_type

        request = Request(url, data, headers)

        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', getUserAgent())

        try:
            response = self.opener.open(request)
            try:
                contentType = response.headers['content-type']
            except KeyError:
                contentType = ''

            resp = response.read()
            # some commands return json
            if contentType == 'application/json':
                if resp:
                    return json.loads(resp)
                return ''
            else:
                # some commands return plain text
                resp = makeUnicode(resp)
                logger.debug("QBitTorrent returned %s" % resp)
                if command == 'version/api':
                    return resp
                # some just return Ok. or Fails.
                if resp and resp != 'Ok.':
                    return False
            # some commands return nothing but response code (always 200)
            return True
        except URLError as err:
            logger.debug('Failed URL: %s' % url)
            logger.debug('QBitTorrent webUI raised the following error: %s' % err.reason)
            return False

예제 #26

0

파일 보기

파일: __init__.py 프로젝트: Cliffnla/LazyLibrarian-1

    def to_string(self):
        """Returns this token as a plain string, suitable for storage.

        The resulting string includes the token's secret, so you should never
        send or store this string where a third party can read it.
        """

        data = {
            'oauth_token': self.key,
            'oauth_token_secret': self.secret,
        }

        if self.callback_confirmed is not None:
            data['oauth_callback_confirmed'] = self.callback_confirmed
        return urlencode(data)

예제 #27

0

파일 보기

파일: grsync.py 프로젝트: andyuc88/LazyLibrarian

 def getShelfBooks(page, shelf_name):
     global client, user_id
     data = '${base}/review/list?format=xml&v=2&id=${user_id}&sort=author&order=a'
     data += '&key=${key}&page=${page}&per_page=100&shelf=${shelf_name}'
     owned_template = Template(data)
     body = urlencode({})
     headers = {'Content-Type': 'application/x-www-form-urlencoded'}
     request_url = owned_template.substitute(base='https://www.goodreads.com', user_id=user_id, page=page,
                                             key=lazylibrarian.CONFIG['GR_API'], shelf_name=shelf_name)
     gr_api_sleep()
     try:
         response, content = client.request(request_url, 'GET', body, headers)
     except Exception as e:
         logger.error("Exception in client.request: %s %s" % (type(e).__name__, traceback.format_exc()))
         return "Error in client.request: see error log"
     if not response['status'].startswith('2'):
         logger.error('Failure status: %s for page %s' % (response['status'], page))
     return content

예제 #28

0

파일 보기

파일: utorrent.py 프로젝트: andyuc88/LazyLibrarian

    def _action(self, params, body=None, content_type=None):
        url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urlencode(params)
        request = Request(url)
        if lazylibrarian.CONFIG['PROXY_HOST']:
            for item in getList(lazylibrarian.CONFIG['PROXY_TYPE']):
                request.set_proxy(lazylibrarian.CONFIG['PROXY_HOST'], item)
        request.add_header('User-Agent', USER_AGENT)

        if body:
            if PY2:
                request.add_data(body)
            else:
                request.data(body)
            request.add_header('Content-length', len(body))
        if content_type:
            request.add_header('Content-type', content_type)

        try:
            response = self.opener.open(request)
            return response.code, json.loads(response.read())
        except HTTPError as err:
            logger.debug('URL: %s' % url)
            logger.debug('uTorrent webUI raised the following error: ' + str(err))

예제 #29

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {
        "type": "search",
        "s_cat": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'].replace(',', ''))
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #30

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def WWT(book=None, test=False):
    errmsg = ''
    provider = "WorldWideTorrents"
    host = lazylibrarian.CONFIG['WWT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/torrents-search.php")

    sterm = makeUnicode(book['searchterm'])

    cat = 0  # 0=all, 36=ebooks, 52=mags, 56=audiobooks
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 56
        elif book['library'] == 'eBook':
            cat = 36
        elif book['library'] == 'magazine':
            cat = 52

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:
        params = {
            "search": book['searchterm'],
            "page": page,
            "cat": cat
        }
        searchURL = providerurl + "/?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # might return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            elif '503' in result:
                logger.warn("Cloudflare bot detection? %s: %s" % (provider, result))
                logger.warn("Try unblocking %s from a browser" % providerurl)
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            rows = []
            try:
                tables = soup.find_all('table')  # un-named table
                table = tables[2]
                if table:
                    rows = table.find_all('tr')
            except IndexError:  # no results table in result page
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers

            for row in rows:
                td = row.find_all('td')
                if len(td) > 3:
                    try:
                        title = unaccented(td[0].text)
                        # can return magnet or torrent or both.
                        magnet = ''
                        url = ''
                        mode = 'torrent'
                        try:
                            magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0]
                            mode = 'magnet'
                        except IndexError:
                            pass
                        try:
                            url = url_fix(host + '/download.php') + \
                                          str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent'
                            mode = 'torrent'
                        except IndexError:
                            pass

                        if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                        try:
                            size = str(td[1].text).replace('&nbsp;', '').upper()
                            size = size_in_bytes(size)
                        except ValueError:
                            size = 0
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if not url or not title:
                            logger.debug('Missing url or title')
                        elif minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': mode,
                                'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                            next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))
        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #31

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def TPB(book=None, test=False):
    errmsg = ''
    provider = "TPB"
    host = lazylibrarian.CONFIG['TPB_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?")

    cat = 0  # 601=ebooks, 102=audiobooks, 0=all, no mag category
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 102
        elif book['library'] == 'eBook':
            cat = 601
        elif book['library'] == 'magazine':
            cat = 0

    sterm = makeUnicode(book['searchterm'])

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:

        params = {
            "q": book['searchterm'],
            "category": cat,
            "page": page,
            "orderby": "99"
        }

        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)

        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            # tpb uses a named table
            table = soup.find('table', id='searchResult')
            if table:
                rows = table.find_all('tr')
            else:
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers
            for row in rows:
                td = row.find_all('td')
                if len(td) > 2:
                    try:
                        new_soup = BeautifulSoup(str(td[1]), 'html5lib')
                        link = new_soup.find("a")
                        magnet = link.get("href")
                        title = link.text
                        size = td[1].text.split(', Size ')[1].split('iB')[0]
                        size = size.replace('&nbsp;', '')
                        size = size_in_bytes(size)
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if minimumseeders < seeders:
                            # no point in asking for magnet link if not enough seeders
                            magurl = '%s/%s' % (host, magnet)
                            result, success = fetchURL(magurl)
                            if not success:
                                logger.debug('Error fetching url %s, %s' % (magurl, result))
                            else:
                                magnet = None
                                new_soup = BeautifulSoup(result, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output and output.startswith('magnet'):
                                        magnet = output
                                        break
                            if not magnet or not title:
                                logger.debug('Missing magnet or title')
                            else:
                                results.append({
                                    'bookid': book['bookid'],
                                    'tor_prov': provider,
                                    'tor_title': title,
                                    'tor_url': magnet,
                                    'tor_size': str(size),
                                    'tor_type': 'magnet',
                                    'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY']
                                })
                                logger.debug('Found %s. Size: %s' % (title, size))
                                next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #32

0

파일 보기

파일: directparser.py 프로젝트: DobyTang/LazyLibrarian

def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if not prov:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                rows = []

                try:
                    table = soup.find_all('table', rules='rows')[-1]  # the last table with rules=rows
                    if table:
                        rows = table.find_all('tr')

                except IndexError:  # no results table in result page
                    rows = []

                if len(rows) > 1:  # skip table headers
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')

                    if 'index.php' in search and len(td) > 3:
                        # Foreign fiction
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            if data:
                                link = data.get('href')
                                extn = td[4].text.split('(')[0].strip()
                                size = td[4].text.split('(')[1].split(')')[0]
                                size = size.upper()
                        except IndexError as e:
                            logger.debug('Error parsing libgen index.php results: %s' % str(e))

                    elif 'search.php' in search and len(td) > 8:
                        # Non-fiction
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            link = ''
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            for res in newsoup.find_all('a'):
                                output = res.get('href')
                                if 'md5' in output:
                                    link = output
                                    break
                        except IndexError as e:
                            logger.debug('Error parsing libgen search.php results; %s' % str(e))

                    size = size_in_bytes(size)

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if link.startswith('http'):
                            url = redirect_url(host, link)
                        else:
                            if "/index.php?" in link:
                                link = 'md5' + link.split('md5')[1]

                            if "/ads.php?" in link:
                                url = url_fix(host + "/" + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            logger.debug('Error fetching link data from %s: %s' % (provider, bookresult))
                            logger.debug(url)
                            url = None
                        else:
                            url = None
                            try:
                                new_soup = BeautifulSoup(bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith('http') and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split('/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split('/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider + '/' + search,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'direct',
                                'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #33

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def KAT(book=None, test=False):
    errmsg = ''
    provider = "KAT"
    host = lazylibrarian.CONFIG['KAT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + quote(book['searchterm']))

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc"
    }
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
            errmsg = result
        result = False

    if test:
        return success

    results = []

    if result:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
        soup = BeautifulSoup(result, 'html5lib')
        rows = []
        try:
            table = soup.find_all('table')[1]  # un-named table
            if table:
                rows = table.find_all('tr')
        except IndexError:  # no results table in result page
            rows = []

        if len(rows) > 1:
            rows = rows[1:]  # first row is headers

        for row in rows:
            td = row.find_all('td')
            if len(td) > 3:
                try:
                    title = unaccented(td[0].text)
                    # kat can return magnet or torrent or both.
                    magnet = ''
                    url = ''
                    mode = 'torrent'
                    try:
                        magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0]
                        mode = 'magnet'
                    except IndexError:
                        pass
                    try:
                        url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent'
                        mode = 'torrent'
                    except IndexError:
                        pass

                    if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                        url = magnet
                        mode = 'magnet'

                    try:
                        size = str(td[1].text).replace('&nbsp;', '').upper()
                        size = size_in_bytes(size)
                    except ValueError:
                        size = 0
                    try:
                        seeders = int(td[3].text.replace(',', ''))
                    except ValueError:
                        seeders = 0

                    if not url or not title:
                        logger.debug('Missing url or title')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': mode,
                            'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #34

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'].replace(',', ''))
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            res = {
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'magnet',
                                'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            }
                            if pubdate:
                                res['tor_date'] = pubdate
                            logger.debug('Found %s. Size: %s' % (title, size))
                            results.append(res)
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #35

0

파일 보기

파일: bookwork.py 프로젝트: knobunc/LazyLibrarian

def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,))
    seriesname = result['SeriesName']
    members = getSeriesMembers(seriesid)
    dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''}

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find('./best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find('./best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug("Author Search found %s %s, authorid %s" %
                                             (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' % searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find('./best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find('./best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug("Title Search found %s %s, authorid %s" %
                                                 (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname))
    return newauth

예제 #36

0

파일 보기

def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?',
                        (seriesid, ))
    seriesname = result['SeriesName']
    members, api_hits = getSeriesMembers(seriesid, seriesname)
    dic = {
        u'\u2018': "",
        u'\u2019': "",
        u'\u201c': '',
        u'\u201d': '',
        "'": "",
        '"': ''
    }

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            # pubyear = member[5]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if not in_cache:
                        api_hits += 1
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find(
                                        './best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find(
                                        './best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug(
                                    "Author Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(
                                lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(
                            params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if not in_cache:
                            api_hits += 1
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' %
                                        searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(
                                    booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find(
                                            './best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find(
                                            './best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug(
                                        "Title Search found %s %s, authorid %s"
                                        % (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" %
                                    (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" %
                                 (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth

예제 #37

0

파일 보기

def KAT(book=None, test=False):
    errmsg = ''
    provider = "KAT"
    host = lazylibrarian.CONFIG['KAT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + quote(book['searchterm']))

    params = {"category": "books", "field": "seeders", "sorder": "desc"}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
            errmsg = result
        result = False

    if test:
        return success

    results = []

    if result:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
        soup = BeautifulSoup(result, 'html5lib')
        rows = []
        try:
            table = soup.find_all('table')[1]  # un-named table
            if table:
                rows = table.find_all('tr')
        except IndexError:  # no results table in result page
            rows = []

        if len(rows) > 1:
            rows = rows[1:]  # first row is headers

        for row in rows:
            td = row.find_all('td')
            if len(td) > 3:
                try:
                    title = unaccented(td[0].text)
                    # kat can return magnet or torrent or both.
                    magnet = ''
                    url = ''
                    mode = 'torrent'
                    try:
                        magnet = 'magnet' + str(
                            td[0]).split('href="magnet')[1].split('"')[0]
                        mode = 'magnet'
                    except IndexError:
                        pass
                    try:
                        url = 'http' + str(td[0]).split('href="http')[1].split(
                            '.torrent?')[0] + '.torrent'
                        mode = 'torrent'
                    except IndexError:
                        pass

                    if not url or (magnet and url
                                   and lazylibrarian.CONFIG['PREFER_MAGNET']):
                        url = magnet
                        mode = 'magnet'

                    try:
                        size = str(td[1].text).replace('&nbsp;', '').upper()
                        mult = 1
                        if 'K' in size:
                            size = size.split('K')[0]
                            mult = 1024
                        elif 'M' in size:
                            size = size.split('M')[0]
                            mult = 1024 * 1024
                        elif 'G' in size:
                            size = size.split('G')[0]
                            mult = 1024 * 1024 * 1024
                        size = int(float(size) * mult)
                    except (ValueError, IndexError):
                        size = 0
                    try:
                        seeders = int(td[3].text)
                    except ValueError:
                        seeders = 0

                    if not url or not title:
                        logger.debug('Missing url or title')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['KAT_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))
                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #38

0

파일 보기

파일: sabnzbd.py 프로젝트: andyuc88/LazyLibrarian

def SABnzbd(title=None, nzburl=None, remove_data=False):

    if nzburl in ['delete', 'delhistory'] and title == 'unknown':
        logger.debug('%s function unavailable in this version of sabnzbd, no nzo_ids' % nzburl)
        return False

    hostname = lazylibrarian.CONFIG['SAB_HOST']
    port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0)
    if not hostname or not port:
        logger.error('Invalid sabnzbd host or port, check your config')
        return False

    if hostname.endswith('/'):
        hostname = hostname[:-1]
    if not hostname.startswith("http://") and not hostname.startswith("https://"):
        hostname = 'http://' + hostname

    HOST = "%s:%s" % (hostname, port)

    if lazylibrarian.CONFIG['SAB_SUBDIR']:
        HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR']

    params = {}
    if nzburl == 'auth' or nzburl == 'get_cats':
        # connection test, check auth mode or get_cats
        params['mode'] = nzburl
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(%s)' % nzburl
    elif nzburl == 'queue':
        params['mode'] = 'queue'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(Queue)'
    elif nzburl == 'history':
        params['mode'] = 'history'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(History)'
    elif nzburl == 'delete':
        # only deletes tasks if still in the queue, ie NOT completed tasks
        params['mode'] = 'queue'
        params['output'] = 'json'
        params['name'] = nzburl
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(Delete) ' + title
    elif nzburl == 'delhistory':
        params['mode'] = 'history'
        params['output'] = 'json'
        params['name'] = 'delete'
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(DelHistory) ' + title
    else:
        params['mode'] = 'addurl'
        params['output'] = 'json'
        if nzburl:
            params['name'] = nzburl
        if title:
            params['nzbname'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if lazylibrarian.CONFIG['SAB_CAT']:
            params['cat'] = lazylibrarian.CONFIG['SAB_CAT']
        if lazylibrarian.CONFIG['USENET_RETENTION']:
            params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION']

# FUTURE-CODE
#    if lazylibrarian.SAB_PRIO:
#        params["priority"] = lazylibrarian.SAB_PRIO
#    if lazylibrarian.SAB_PP:
#        params["script"] = lazylibrarian.SAB_SCRIPT

    URL = HOST + "/api?" + urlencode(params)

    # to debug because of api
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)
    proxies = proxyList()
    try:
        timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
        r = requests.get(URL, timeout=timeout, proxies=proxies)
        result = r.json()
    except requests.exceptions.Timeout:
        logger.error("Timeout connecting to SAB with URL: %s" % URL)
        return False
    except Exception as e:
        if hasattr(e, 'reason'):
            errmsg = e.reason
        elif hasattr(e, 'strerror'):
            errmsg = e.strerror
        else:
            errmsg = str(e)

        logger.error("Unable to connect to SAB with URL: %s, %s" % (URL, errmsg))
        return False
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug("Result text from SAB: " + str(result))

    if title:
        title = unaccented_str(title)
        if title.startswith('LL.('):
            return result
    if result['status'] is True:
        logger.info("%s sent to SAB successfully." % title)
        # sab versions earlier than 0.8.0 don't return nzo_ids
        if 'nzo_ids' in result:
            if result['nzo_ids']:  # check its not empty
                return result['nzo_ids'][0]
        return 'unknown'
    elif result['status'] is False:
        logger.error("SAB returned Error: %s" % result['error'])
        return False
    else:
        logger.error("Unknown error: " + str(result))
        return False

예제 #39

0

파일 보기

def setWorkID(books=None):
    """ Set the goodreads workid for any books that don't already have one
        books is a comma separated list of bookids or if empty, select from database
        Paginate requests to reduce api hits """

    myDB = database.DBConnection()
    pages = []
    if books:
        page = books
        pages.append(page)
    else:
        cmd = "select BookID,BookName from books where WorkID='' or WorkID is null"
        books = myDB.select(cmd)
        if books:
            counter = 0
            logger.debug('Setting WorkID for %s book%s' %
                         (len(books), plural(len(books))))
            page = ''
            for book in books:
                bookid = book['BookID']
                if not bookid:
                    logger.debug("No bookid for %s" % book['BookName'])
                else:
                    if page:
                        page = page + ','
                    page = page + bookid
                    counter += 1
                    if counter == 50:
                        counter = 0
                        pages.append(page)
                        page = ''
            if page:
                pages.append(page)

    counter = 0
    params = {"key": lazylibrarian.CONFIG['GR_API']}
    for page in pages:
        URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode(
            params)
        try:
            rootxml, in_cache = gr_xml_request(URL, useCache=False)
            if rootxml is None:
                logger.debug("Error requesting id_to_work_id page")
            else:
                resultxml = rootxml.find('work-ids')
                if len(resultxml):
                    ids = resultxml.getiterator('item')
                    books = getList(page)
                    cnt = 0
                    for item in ids:
                        workid = item.text
                        if not workid:
                            logger.debug("No workid returned for %s" %
                                         books[cnt])
                        else:
                            counter += 1
                            controlValueDict = {"BookID": books[cnt]}
                            newValueDict = {"WorkID": workid}
                            myDB.upsert("books", newValueDict,
                                        controlValueDict)
                        cnt += 1

        except Exception as e:
            logger.error("%s parsing id_to_work_id page: %s" %
                         (type(e).__name__, str(e)))

    msg = 'Updated %s id%s' % (counter, plural(counter))
    logger.debug("setWorkID complete: " + msg)
    return msg

예제 #40

0

파일 보기

파일: sabnzbd.py 프로젝트: cdancette/LazyLibrarian

def SABnzbd(title=None, nzburl=None, remove_data=False):

    if nzburl in ['delete', 'delhistory'] and title == 'unknown':
        logger.debug(
            '%s function unavailable in this version of sabnzbd, no nzo_ids' %
            nzburl)
        return False

    hostname = lazylibrarian.CONFIG['SAB_HOST']
    port = check_int(lazylibrarian.CONFIG['SAB_PORT'], 0)
    if not hostname or not port:
        logger.error('Invalid sabnzbd host or port, check your config')
        return False

    if hostname.endswith('/'):
        hostname = hostname[:-1]
    if not hostname.startswith("http://") and not hostname.startswith(
            "https://"):
        hostname = 'http://' + hostname

    HOST = "%s:%s" % (hostname, port)

    if lazylibrarian.CONFIG['SAB_SUBDIR']:
        HOST = HOST + "/" + lazylibrarian.CONFIG['SAB_SUBDIR']

    params = {}
    if nzburl == 'auth' or nzburl == 'get_cats':
        # connection test, check auth mode or get_cats
        params['mode'] = nzburl
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(%s)' % nzburl
    elif nzburl == 'queue':
        params['mode'] = 'queue'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(Queue)'
    elif nzburl == 'history':
        params['mode'] = 'history'
        params['output'] = 'json'
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        title = 'LL.(History)'
    elif nzburl == 'delete':
        # only deletes tasks if still in the queue, ie NOT completed tasks
        params['mode'] = 'queue'
        params['output'] = 'json'
        params['name'] = nzburl
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(Delete) ' + title
    elif nzburl == 'delhistory':
        params['mode'] = 'history'
        params['output'] = 'json'
        params['name'] = 'delete'
        params['value'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if remove_data:
            params['del_files'] = 1
        title = 'LL.(DelHistory) ' + title
    else:
        params['mode'] = 'addurl'
        params['output'] = 'json'
        if nzburl:
            params['name'] = nzburl
        if title:
            params['nzbname'] = title
        if lazylibrarian.CONFIG['SAB_USER']:
            params['ma_username'] = lazylibrarian.CONFIG['SAB_USER']
        if lazylibrarian.CONFIG['SAB_PASS']:
            params['ma_password'] = lazylibrarian.CONFIG['SAB_PASS']
        if lazylibrarian.CONFIG['SAB_API']:
            params['apikey'] = lazylibrarian.CONFIG['SAB_API']
        if lazylibrarian.CONFIG['SAB_CAT']:
            params['cat'] = lazylibrarian.CONFIG['SAB_CAT']
        if lazylibrarian.CONFIG['USENET_RETENTION']:
            params["maxage"] = lazylibrarian.CONFIG['USENET_RETENTION']

# FUTURE-CODE
#    if lazylibrarian.SAB_PRIO:
#        params["priority"] = lazylibrarian.SAB_PRIO
#    if lazylibrarian.SAB_PP:
#        params["script"] = lazylibrarian.SAB_SCRIPT

    URL = HOST + "/api?" + urlencode(params)

    # to debug because of api
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)
    proxies = proxyList()
    try:
        timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
        r = requests.get(URL, timeout=timeout, proxies=proxies)
        result = r.json()
    except requests.exceptions.Timeout:
        logger.error("Timeout connecting to SAB with URL: %s" % URL)
        return False
    except Exception as e:
        if hasattr(e, 'reason'):
            errmsg = e.reason
        elif hasattr(e, 'strerror'):
            errmsg = e.strerror
        else:
            errmsg = str(e)

        logger.error("Unable to connect to SAB with URL: %s, %s" %
                     (URL, errmsg))
        return False
    if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
        logger.debug("Result text from SAB: " + str(result))

    if title:
        title = unaccented_str(title)
        if title.startswith('LL.('):
            return result
    if result['status'] is True:
        logger.info("%s sent to SAB successfully." % title)
        # sab versions earlier than 0.8.0 don't return nzo_ids
        if 'nzo_ids' in result:
            if result['nzo_ids']:  # check its not empty
                return result['nzo_ids'][0]
        return 'unknown'
    elif result['status'] is False:
        logger.error("SAB returned Error: %s" % result['error'])
        return False
    else:
        logger.error("Unknown error: " + str(result))
        return False

예제 #41

0

파일 보기

파일: gb.py 프로젝트: andyuc88/LazyLibrarian

    def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped",
                         entrystatus='Active', refresh=False):
        # noinspection PyBroadException
        try:
            logger.debug('[%s] Now processing books with Google Books API' % authorname)
            # google doesnt like accents in author names
            set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname))

            api_hits = 0
            gr_lang_hits = 0
            lt_lang_hits = 0
            gb_lang_change = 0
            cache_hits = 0
            not_cached = 0
            startindex = 0
            resultcount = 0
            removedResults = 0
            duplicates = 0
            ignored = 0
            added_count = 0
            updated_count = 0
            book_ignore_count = 0
            total_count = 0
            number_results = 1

            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
            # Artist is loading
            myDB = database.DBConnection()
            controlValueDict = {"AuthorID": authorid}
            newValueDict = {"Status": "Loading"}
            myDB.upsert("authors", newValueDict, controlValueDict)

            try:
                while startindex < number_results:

                    self.params['startIndex'] = startindex
                    URL = set_url + '&' + urlencode(self.params)

                    try:
                        jsonresults, in_cache = gb_json_request(URL, useCache=not refresh)
                        if jsonresults is None:
                            number_results = 0
                        else:
                            if not in_cache:
                                api_hits += 1
                            number_results = jsonresults['totalItems']
                    except Exception as err:
                        if hasattr(err, 'reason'):
                            errmsg = err.reason
                        else:
                            errmsg = str(err)
                        logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                        break

                    if number_results == 0:
                        logger.warn('Found no results for %s' % authorname)
                        break
                    else:
                        logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname))

                    startindex += 40

                    for item in jsonresults['items']:

                        total_count += 1
                        book = bookdict(item)
                        # skip if no author, no author is no book.
                        if not book['author']:
                            logger.debug('Skipped a result without authorfield.')
                            continue

                        isbnhead = ""
                        if len(book['isbn']) == 10:
                            isbnhead = book['isbn'][0:3]
                        elif len(book['isbn']) == 13:
                            isbnhead = book['isbn'][3:6]

                        booklang = book['lang']
                        # do we care about language?
                        if "All" not in valid_langs:
                            if book['isbn']:
                                # seems google lies to us, sometimes tells us books are in english when they are not
                                if booklang == "Unknown" or booklang == "en":
                                    googlelang = booklang
                                    match = False
                                    lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,))
                                    if lang:
                                        booklang = lang['lang']
                                        cache_hits += 1
                                        logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead))
                                        match = True
                                    if not match:  # no match in cache, try lookup dict
                                        if isbnhead:
                                            if len(book['isbn']) == 13 and book['isbn'].startswith('979'):
                                                for lang in lazylibrarian.isbn_979_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_979_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            elif (len(book['isbn']) == 10) or \
                                                    (len(book['isbn']) == 13 and book['isbn'].startswith('978')):
                                                for lang in lazylibrarian.isbn_978_dict:
                                                    if isbnhead.startswith(lang):
                                                        booklang = lazylibrarian.isbn_978_dict[lang]
                                                        logger.debug("ISBN979 returned %s for %s" %
                                                                     (booklang, isbnhead))
                                                        match = True
                                                        break
                                            if match:
                                                myDB.action('insert into languages values (?, ?)',
                                                            (isbnhead, booklang))

                                    if not match:
                                        booklang = thingLang(book['isbn'])
                                        lt_lang_hits += 1
                                        if booklang:
                                            match = True
                                            myDB.action('insert into languages values (?, ?)', (isbnhead, booklang))

                                    if match:
                                        # We found a better language match
                                        if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]:
                                            # these are all english, may need to expand this list
                                            logger.debug("%s Google thinks [%s], we think [%s]" %
                                                         (book['name'], googlelang, booklang))
                                            gb_lang_change += 1
                                    else:  # No match anywhere, accept google language
                                        booklang = googlelang

                            # skip if language is in ignore list
                            if booklang not in valid_langs:
                                logger.debug('Skipped [%s] with language %s' % (book['name'], booklang))
                                ignored += 1
                                continue

                        rejected = 0
                        check_status = False
                        book_status = bookstatus  # new_book status, or new_author status
                        audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                        added = today()
                        locked = False
                        existing_book = None
                        bookname = book['name']
                        bookid = item['id']
                        if not bookname:
                            logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname))
                            removedResults += 1
                            rejected = 1
                        else:
                            bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip()
                            # GoodReads sometimes has multiple bookids for the same book (same author/title, different
                            # editions) and sometimes uses the same bookid if the book is the same but the title is
                            # slightly different. Not sure if googlebooks does too, but we only want one...
                            cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?'
                            existing_book = myDB.match(cmd, (bookid,))
                            if existing_book:
                                book_status = existing_book['Status']
                                audio_status = existing_book['AudioStatus']
                                locked = existing_book['Manual']
                                added = existing_book['BookAdded']
                                if locked is None:
                                    locked = False
                                elif locked.isdigit():
                                    locked = bool(int(locked))
                            else:
                                if rejected in [3, 4, 5]:
                                    book_status = 'Ignored'
                                    audio_status = 'Ignored'
                                else:
                                    book_status = bookstatus  # new_book status, or new_author status
                                    audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS']
                                added = today()
                                locked = False

                        if not rejected and re.match('[^\w-]', bookname):  # remove books with bad characters in title
                            logger.debug("[%s] removed book for bad characters" % bookname)
                            removedResults += 1
                            rejected = 2

                        if not rejected and lazylibrarian.CONFIG['NO_FUTURE']:
                            # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd
                            if book['date'] > today()[:len(book['date'])]:
                                logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date']))
                                removedResults += 1
                                rejected = 3

                        if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']:
                            if not book['date']:
                                logger.debug('Rejecting %s, no publication date' % bookname)
                                removedResults += 1
                                rejected = 4

                        if not rejected and lazylibrarian.CONFIG['NO_ISBN']:
                            if not isbnhead:
                                logger.debug('Rejecting %s, no isbn' % bookname)
                                removedResults += 1
                                rejected = 5

                        if not rejected:
                            cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID'
                            cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE'
                            match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""')))
                            if match:
                                if match['BookID'] != bookid:  # we have a different book with this author/title already
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got %s' %
                                                 (match['BookID'], authorname, bookname, bookid))
                                    rejected = 6
                                    duplicates += 1

                        if not rejected:
                            cmd = 'SELECT AuthorName,BookName FROM books,authors'
                            cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?'
                            match = myDB.match(cmd, (bookid,))
                            if match:  # we have a book with this bookid already
                                if bookname != match['BookName'] or authorname != match['AuthorName']:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' %
                                                 (bookid, authorname, bookname, match['AuthorName'], match['BookName']))
                                else:
                                    logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' %
                                                 (bookid, authorname, bookname))
                                    check_status = True
                                duplicates += 1
                                rejected = 7

                        if check_status or not rejected or (
                                lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]):  # dates, isbn
                            if not locked:
                                controlValueDict = {"BookID": bookid}
                                newValueDict = {
                                    "AuthorID": authorid,
                                    "BookName": bookname,
                                    "BookSub": book['sub'],
                                    "BookDesc": book['desc'],
                                    "BookIsbn": book['isbn'],
                                    "BookPub": book['pub'],
                                    "BookGenre": book['genre'],
                                    "BookImg": book['img'],
                                    "BookLink": book['link'],
                                    "BookRate": float(book['rate']),
                                    "BookPages": book['pages'],
                                    "BookDate": book['date'],
                                    "BookLang": booklang,
                                    "Status": book_status,
                                    "AudioStatus": audio_status,
                                    "BookAdded": added
                                }
                                resultcount += 1

                                myDB.upsert("books", newValueDict, controlValueDict)
                                logger.debug("Book found: " + bookname + " " + book['date'])
                                updated = False
                                if 'nocover' in book['img'] or 'nophoto' in book['img']:
                                    # try to get a cover from another source
                                    workcover, source = getBookCover(bookid)
                                    if workcover:
                                        logger.debug('Updated cover for %s using %s' % (bookname, source))
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": workcover}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True

                                elif book['img'] and book['img'].startswith('http'):
                                    link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh)
                                    if success:
                                        controlValueDict = {"BookID": bookid}
                                        newValueDict = {"BookImg": link}
                                        myDB.upsert("books", newValueDict, controlValueDict)
                                        updated = True
                                    else:
                                        logger.debug('Failed to cache image for %s' % book['img'])

                                serieslist = []
                                if book['series']:
                                    serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))]
                                if lazylibrarian.CONFIG['ADD_SERIES']:
                                    newserieslist = getWorkSeries(bookid)
                                    if newserieslist:
                                        serieslist = newserieslist
                                        logger.debug('Updated series: %s [%s]' % (bookid, serieslist))
                                        updated = True
                                setSeries(serieslist, bookid)

                                new_status = setStatus(bookid, serieslist, bookstatus)

                                if not new_status == book_status:
                                    book_status = new_status
                                    updated = True

                                worklink = getWorkPage(bookid)
                                if worklink:
                                    controlValueDict = {"BookID": bookid}
                                    newValueDict = {"WorkPage": worklink}
                                    myDB.upsert("books", newValueDict, controlValueDict)

                                if not existing_book:
                                    logger.debug("[%s] Added book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    added_count += 1
                                elif updated:
                                    logger.debug("[%s] Updated book: %s [%s] status %s" %
                                                 (authorname, bookname, booklang, book_status))
                                    updated_count += 1
                            else:
                                book_ignore_count += 1
            except KeyError:
                pass

            deleteEmptySeries()
            logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' %
                         (authorname, api_hits, plural(api_hits)))
            cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?'
            cmd += ' AND Status != "Ignored" order by BookDate DESC'
            lastbook = myDB.match(cmd, (authorid,))

            if lastbook:  # maybe there are no books [remaining] for this author
                lastbookname = lastbook['BookName']
                lastbooklink = lastbook['BookLink']
                lastbookdate = lastbook['BookDate']
                lastbookimg = lastbook['BookImg']
            else:
                lastbookname = ""
                lastbooklink = ""
                lastbookdate = ""
                lastbookimg = ""

            controlValueDict = {"AuthorID": authorid}
            newValueDict = {
                "Status": entrystatus,
                "LastBook": lastbookname,
                "LastLink": lastbooklink,
                "LastDate": lastbookdate,
                "LastBookImg": lastbookimg
            }

            myDB.upsert("authors", newValueDict, controlValueDict)

            logger.debug("Found %s total book%s for author" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored)))
            logger.debug("Removed %s bad character or no-name result%s for author" %
                         (removedResults, plural(removedResults)))
            logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates)))
            logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count)))
            logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount)))

            myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
                        (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change,
                         cache_hits, ignored, removedResults, not_cached, duplicates))

            if refresh:
                logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" %
                            (authorname, added_count, plural(added_count), updated_count, plural(updated_count)))
            else:
                logger.info("[%s] Book processing complete: Added %s book%s to the database" %
                            (authorname, added_count, plural(added_count)))

        except Exception:
            logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())

예제 #42

0

파일 보기

파일: gb.py 프로젝트: andyuc88/LazyLibrarian

    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (
                                        book['author'].replace('"', '""'),))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())

예제 #43

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {
        "q": book['searchterm'],
        "category": "books",
        "fmt": "rss"
    }
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'].replace(',', ''))
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': mode,
                            'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #44

0

파일 보기

def TPB(book=None, test=False):
    errmsg = ''
    provider = "TPB"
    host = lazylibrarian.CONFIG['TPB_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?")

    cat = 0  # 601=ebooks, 102=audiobooks, 0=all, no mag category
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 102
        elif book['library'] == 'eBook':
            cat = 601
        elif book['library'] == 'magazine':
            cat = 0

    sterm = makeUnicode(book['searchterm'])

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:

        params = {
            "q": book['searchterm'],
            "category": cat,
            "page": page,
            "orderby": "99"
        }

        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)

        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            # tpb uses a named table
            table = soup.find('table', id='searchResult')
            if table:
                rows = table.find_all('tr')
            else:
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers
            for row in rows:
                td = row.find_all('td')
                if len(td) > 2:
                    try:
                        new_soup = BeautifulSoup(str(td[1]), 'html5lib')
                        link = new_soup.find("a")
                        magnet = link.get("href")
                        title = link.text
                        size = td[1].text.split(', Size ')[1].split('iB')[0]
                        size = size.replace('&nbsp;', '')
                        mult = 1
                        try:
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0
                        try:
                            seeders = int(td[2].text)
                        except ValueError:
                            seeders = 0

                        if minimumseeders < int(seeders):
                            # no point in asking for magnet link if not enough seeders
                            magurl = '%s/%s' % (host, magnet)
                            result, success = fetchURL(magurl)
                            if not success:
                                logger.debug('Error fetching url %s, %s' %
                                             (magurl, result))
                            else:
                                magnet = None
                                new_soup = BeautifulSoup(result, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output and output.startswith('magnet'):
                                        magnet = output
                                        break
                            if not magnet or not title:
                                logger.debug('Missing magnet or title')
                            else:
                                results.append({
                                    'bookid':
                                    book['bookid'],
                                    'tor_prov':
                                    provider,
                                    'tor_title':
                                    title,
                                    'tor_url':
                                    magnet,
                                    'tor_size':
                                    str(size),
                                    'tor_type':
                                    'magnet',
                                    'priority':
                                    lazylibrarian.CONFIG['TPB_DLPRIORITY']
                                })
                                logger.debug('Found %s. Size: %s' %
                                             (title, size))
                                next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #45

0

파일 보기

파일: torrentparser.py 프로젝트: knobunc/LazyLibrarian

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {
        "q": book['searchterm']
    }
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        res = {
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        }
                        if pubdate:
                            res['tor_date'] = pubdate
                        results.append(res)
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #46

0

파일 보기

def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {"q": book['searchterm'], "category": "books", "fmt": "rss"}
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'])
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #47

0

파일 보기

def getSeriesMembers(seriesID=None, seriesname=None):
    """ Ask librarything or goodreads for details on all books in a series
        order, bookname, authorname, workid, authorid
        (workid and authorid are goodreads only)
        Return as a list of lists """
    results = []
    api_hits = 0
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        params = {"format": "xml", "key": lazylibrarian.CONFIG['GR_API']}
        URL = 'https://www.goodreads.com/series/%s?%s' % (seriesID,
                                                          urlencode(params))
        try:
            rootxml, in_cache = gr_xml_request(URL)
            if not in_cache:
                api_hits += 1
            if rootxml is None:
                logger.debug("Series %s:%s not recognised at goodreads" %
                             (seriesID, seriesname))
                return [], api_hits
        except Exception as e:
            logger.error("%s finding series %s: %s" %
                         (type(e).__name__, seriesID, str(e)))
            return [], api_hits

        works = rootxml.find('series/series_works')
        books = works.getiterator('series_work')
        if books is None:
            logger.warn('No books found for %s' % seriesID)
            return [], api_hits
        for book in books:
            mydict = {}
            for mykey, location in [('order', 'user_position'),
                                    ('bookname', 'work/best_book/title'),
                                    ('authorname',
                                     'work/best_book/author/name'),
                                    ('workid', 'work/id'),
                                    ('authorid', 'work/best_book/author/id'),
                                    ('pubyear',
                                     'work/original_publication_year')]:
                if book.find(location) is not None:
                    mydict[mykey] = book.find(location).text
                else:
                    mydict[mykey] = ""
            results.append([
                mydict['order'], mydict['bookname'], mydict['authorname'],
                mydict['workid'], mydict['authorid'], mydict['pubyear']
            ])
    else:
        api_hits = 0
        data = getBookWork(None, "SeriesPage", seriesID)
        if data:
            try:
                table = data.split('class="worksinseries"')[1].split(
                    '</table>')[0]
                rows = table.split('<tr')
                for row in rows:
                    if 'href=' in row:
                        booklink = row.split('href="')[1]
                        bookname = booklink.split('">')[1].split('<')[0]
                        # booklink = booklink.split('"')[0]
                        try:
                            authorlink = row.split('href="')[2]
                            authorname = authorlink.split('">')[1].split(
                                '<')[0]
                            # authorlink = authorlink.split('"')[0]
                            order = row.split('class="order">')[1].split(
                                '<')[0]
                            results.append(
                                [order, bookname, authorname, '', ''])
                        except IndexError:
                            logger.debug(
                                'Incomplete data in series table for series %s'
                                % seriesID)
            except IndexError:
                if 'class="worksinseries"' in data:  # error parsing, or just no series data available?
                    logger.debug('Error in series table for series %s' %
                                 seriesID)
    return results, api_hits

예제 #48

0

파일 보기

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'])
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    if link and minimumseeders < int(seeders):
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl + link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'magnet',
                                'priority':
                                lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #49

0

파일 보기

def WWT(book=None, test=False):
    errmsg = ''
    provider = "WorldWideTorrents"
    host = lazylibrarian.CONFIG['WWT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/torrents-search.php")

    sterm = makeUnicode(book['searchterm'])

    cat = 0  # 0=all, 36=ebooks, 52=mags, 56=audiobooks
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 56
        elif book['library'] == 'eBook':
            cat = 36
        elif book['library'] == 'magazine':
            cat = 52

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:
        params = {"search": book['searchterm'], "page": page, "cat": cat}
        searchURL = providerurl + "/?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # might return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')

            try:
                tables = soup.find_all('table')  # un-named table
                table = tables[2]
                if table:
                    rows = table.find_all('tr')
            except IndexError:  # no results table in result page
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers

            for row in rows:
                td = row.find_all('td')
                if len(td) > 3:
                    try:
                        title = unaccented(td[0].text)
                        # can return magnet or torrent or both.
                        magnet = ''
                        url = ''
                        mode = 'torrent'
                        try:
                            magnet = 'magnet' + str(
                                td[0]).split('href="magnet')[1].split('"')[0]
                            mode = 'magnet'
                        except IndexError:
                            pass
                        try:
                            url = url_fix(host + '/download.php') + \
                                          str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent'
                            mode = 'torrent'
                        except IndexError:
                            pass

                        if not url or (magnet and url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                        try:
                            size = str(td[1].text).replace('&nbsp;',
                                                           '').upper()
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0
                        try:
                            seeders = int(td[2].text)
                        except ValueError:
                            seeders = 0

                        if not url or not title:
                            logger.debug('Missing url or title')
                        elif minimumseeders < int(seeders):
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                mode,
                                'priority':
                                lazylibrarian.CONFIG['WWT_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                            next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))
        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #50

0

파일 보기

파일: __init__.py 프로젝트: DobyTang/LazyLibrarian

 def encode_postdata(data):
     # tell urlencode to deal with sequence values and map them correctly
     # to resulting querystring. for example self["k"] = ["v1", "v2"] will
     # result in 'k=v1&k=v2' and not k=%5B%27v1%27%2C+%27v2%27%5D
     return urlencode(data, True)

예제 #51

0

파일 보기

def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'])
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #52

0

파일 보기

파일: providers.py 프로젝트: knobunc/LazyLibrarian

def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (
        searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urlencode(params)

        sterm = makeUnicode(book['searchterm'])

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL, raw=True)

        if test:
            try:
                result = result.decode('utf-8')
            except UnicodeDecodeError:
                result = result.decode('latin-1')
            except AttributeError:
                pass

            if result.startswith('<') and result.endswith('/>') and "error code" in result:
                result = result[1:-2]
                success = False
            if not success:
                logger.debug(result)
            return success, result

        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            try:
                result = result.decode('utf-8')
            except UnicodeDecodeError:
                result = result.decode('latin-1')
            except AttributeError:
                pass

            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            # maybe the host doesn't support the search type
            cancelled = cancelSearchType(searchType, result, provider)
            if not cancelled:  # it was some other problem
                BlockProvider(provider['HOST'], result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                cancelled = cancelSearchType(searchType, errormsg, provider)
                if not cancelled:  # it was some other problem
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY'])
                        thisnzb['dispname'] = provider['DISPNAME']
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.warn('Unable to get age from [%s] %s %s' %
                                            (thisnzb['nzbdate'], type(e).__name__, str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' % (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results

예제 #53

0

파일 보기

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {"q": book['searchterm']}
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(',')[0].strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < int(seeders):
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

예제 #54

0

파일 보기

파일: providers.py 프로젝트: cdancette/LazyLibrarian

def NewzNabPlus(book=None,
                provider=None,
                searchType=None,
                searchMode=None,
                test=False):
    """
    Generic NewzNabplus query function
    takes in host+key+type and returns the result set regardless of who
    based on site running NewzNab+
    ref http://usenetreviewz.com/nzb-sites/
    """

    host = provider['HOST']
    api_key = provider['API']
    logger.debug(
        '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]'
        % (searchType, host, searchMode, api_key, str(book)))

    results = []

    params = ReturnSearchTypeStructure(provider, api_key, book, searchType,
                                       searchMode)

    if params:
        if not str(host)[:4] == "http":
            host = 'http://' + host
        if host[-1:] == '/':
            host = host[:-1]
        URL = host + '/api?' + urlencode(params)

        sterm = makeUnicode(book['searchterm'])

        rootxml = None
        logger.debug("[NewzNabPlus] URL = %s" % URL)
        result, success = fetchURL(URL, raw=True)

        if test:
            try:
                result = result.decode('utf-8')
            except UnicodeDecodeError:
                result = result.decode('latin-1')

            if result.startswith('<') and result.endswith(
                    '/>') and "error code" in result:
                result = result[1:-2]
                success = False
            if not success:
                logger.debug(result)
            return success, result

        if success:
            try:
                rootxml = ElementTree.fromstring(result)
            except Exception as e:
                logger.error('Error parsing data from %s: %s %s' %
                             (host, type(e).__name__, str(e)))
                rootxml = None
        else:
            try:
                result = result.decode('utf-8')
            except UnicodeDecodeError:
                result = result.decode('latin-1')

            if not result or result == "''":
                result = "Got an empty response"
            logger.error('Error reading data from %s: %s' % (host, result))
            # maybe the host doesn't support the search type
            cancelled = cancelSearchType(searchType, result, provider)
            if not cancelled:  # it was some other problem
                BlockProvider(provider['HOST'], result)

        if rootxml is not None:
            # to debug because of api
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (URL, host))

            if rootxml.tag == 'error':
                errormsg = rootxml.get('description', default='unknown error')
                logger.error("%s - %s" % (host, errormsg))
                # maybe the host doesn't support the search type
                cancelled = cancelSearchType(searchType, errormsg, provider)
                if not cancelled:  # it was some other problem
                    BlockProvider(provider['HOST'], errormsg)
            else:
                resultxml = rootxml.getiterator('item')
                nzbcount = 0
                maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0)
                for nzb in resultxml:
                    try:
                        thisnzb = ReturnResultsFieldsBySearchType(
                            book, nzb, host, searchMode,
                            provider['DLPRIORITY'])
                        thisnzb['dispname'] = provider['DISPNAME']
                        if not maxage:
                            nzbcount += 1
                            results.append(thisnzb)
                        else:
                            # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200
                            nzbdate = thisnzb['nzbdate']
                            try:
                                parts = nzbdate.split(' ')
                                nzbdate = ' '.join(
                                    parts[:5])  # strip the +0200
                                dt = datetime.datetime.strptime(
                                    nzbdate,
                                    "%a, %d %b %Y %H:%M:%S").timetuple()
                                nzbage = age(
                                    '%04d-%02d-%02d' %
                                    (dt.tm_year, dt.tm_mon, dt.tm_mday))
                            except Exception as e:
                                logger.warn(
                                    'Unable to get age from [%s] %s %s' %
                                    (thisnzb['nzbdate'], type(e).__name__,
                                     str(e)))
                                nzbage = 0
                            if nzbage <= maxage:
                                nzbcount += 1
                                results.append(thisnzb)
                            else:
                                logger.debug('%s is too old (%s day%s)' %
                                             (thisnzb['nzbtitle'], nzbage,
                                              plural(nzbage)))

                    except IndexError:
                        logger.debug('No results from %s for %s' %
                                     (host, sterm))
                logger.debug('Found %s nzb at %s for: %s' %
                             (nzbcount, host, sterm))
        else:
            logger.debug('No data returned from %s for %s' % (host, sterm))
    return results

예제 #55

0

파일 보기

파일: directparser.py 프로젝트: jamiees2/LazyLibrarian

def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                try:
                    table = soup.find_all('table')[2]  # un-named table
                    if table:
                        rows = table.find_all('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if 'search.php' in search and len(rows) > 1:
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')
                    if 'index.php' in search and len(td) > 3:
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            link = data.get('href')
                            extn = data.text.split('(')[0]
                            size = data.text.split('(')[1].split(')')[0]
                            size = size.upper()
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            link = newsoup.get('href')
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    if not size:
                        size = 0
                    else:
                        try:
                            mult = 1
                            if 'K' in size:
                                size = size.split('K')[0]
                                mult = 1024
                            elif 'M' in size:
                                size = size.split('M')[0]
                                mult = 1024 * 1024
                            elif 'G' in size:
                                size = size.split('G')[0]
                                mult = 1024 * 1024 * 1024
                            size = int(float(size) * mult)
                        except (ValueError, IndexError):
                            size = 0

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if not link.startswith('http'):
                            if "/ads.php?" in link:
                                url = url_fix(host + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)
                        else:
                            url = redirect_url(host, link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            # may return 404 if no results, not really an error
                            if '404' in bookresult:
                                logger.debug(
                                    "No results found from %s for %s" %
                                    (provider, sterm))
                            else:
                                logger.debug(url)
                                logger.debug(
                                    'Error fetching link data from %s: %s' %
                                    (provider, bookresult))
                                errmsg = bookresult
                            bookresult = False

                        if bookresult:
                            url = None
                            try:
                                new_soup = BeautifulSoup(
                                    bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error(
                                    '%s parsing bookresult for %s: %s' %
                                    (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

예제 #56

0

파일 보기

파일: bookwork.py 프로젝트: DobyTang/LazyLibrarian

def setWorkID(books=None):
    """ Set the goodreads workid for any books that don't already have one
        books is a comma separated list of bookids or if empty, select from database
        Paginate requests to reduce api hits """

    myDB = database.DBConnection()
    pages = []
    if books:
        page = books
        pages.append(page)
    else:
        cmd = "select BookID,BookName from books where WorkID='' or WorkID is null"
        books = myDB.select(cmd)
        if books:
            counter = 0
            logger.debug('Setting WorkID for %s book%s' % (len(books), plural(len(books))))
            page = ''
            for book in books:
                bookid = book['BookID']
                if not bookid:
                    logger.debug("No bookid for %s" % book['BookName'])
                else:
                    if page:
                        page = page + ','
                    page = page + bookid
                    counter += 1
                    if counter == 50:
                        counter = 0
                        pages.append(page)
                        page = ''
            if page:
                pages.append(page)

    counter = 0
    params = {"key": lazylibrarian.CONFIG['GR_API']}
    for page in pages:
        URL = 'https://www.goodreads.com/book/id_to_work_id/' + page + '?' + urlencode(params)
        try:
            rootxml, in_cache = gr_xml_request(URL, useCache=False)
            if rootxml is None:
                logger.debug("Error requesting id_to_work_id page")
            else:
                resultxml = rootxml.find('work-ids')
                if len(resultxml):
                    ids = resultxml.getiterator('item')
                    books = getList(page)
                    cnt = 0
                    for item in ids:
                        workid = item.text
                        if not workid:
                            logger.debug("No workid returned for %s" % books[cnt])
                        else:
                            counter += 1
                            controlValueDict = {"BookID": books[cnt]}
                            newValueDict = {"WorkID": workid}
                            myDB.upsert("books", newValueDict, controlValueDict)
                        cnt += 1

        except Exception as e:
            logger.error("%s parsing id_to_work_id page: %s" % (type(e).__name__, str(e)))

    msg = 'Updated %s id%s' % (counter, plural(counter))
    logger.debug("setWorkID complete: " + msg)
    return msg