Python strip_site_name 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: gruntle.memebot.utils.browser

메소드/함수: strip_site_name

hotexamples.com에서의 예제들: 5

Python strip_site_name - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 gruntle.memebot.utils.browser.strip_site_name에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: youtube.py 프로젝트: Havvy/madcow

    def handle(self, response, log, browser, video_id):
        attr = {'video_id': video_id}

        title = None
        if response.data_type == 'soup':
            soup = response.data
            with trapped:
                title = strip_site_name(render_node(soup.head.title), response.url)
            if self.extra_attr:
                desc = soup.find('div', id='watch-description-clip')

                # this describes what we need to scrape.. youtube is awfully structured.
                # NOTE: this is stupidly slow.. disable if doing any volume.
                for row in (('uploader', desc, 'p',    'id',    'watch-uploader-info',           None),
                            ('summary',  desc, 'p',    'id',    'eow-description',               None),
                            ('category', desc, 'p',    'id',    'eow-category',                  None),
                            ('license',  desc, 'p',    'id',    'eow-reuse',                     None),
                            ('views',    soup, 'span', 'class', 'watch-view-count',              None),
                            ('extras',   soup, 'ul',   'id',    'watch-description-extra-info', 'li' ),
                            ('tags',     desc, 'ul',   'id',    'eow-tags',                      'a' )):
                    with trapped:

                        name, parent, tag, key, val, multi = row
                        node = parent.find(tag, **{key: val})
                        attr[name] = [render_node(s) for s in node(multi)] if multi else render_node(node)

        return ScanResult(response=response,
                          override_url=None,
                          title=title,
                          content_type=None,
                          content=None,
                          attr=attr)

예제 #2

파일 보기

파일: imgur.py 프로젝트: seunboi4u/madcow

    def handle(self, response, log, browser):
        if response.data_type != 'soup':
            raise InvalidContent(response, 'Not an HTML file')
        soup = response.data

        title = None
        with trapped:
            title = strip_site_name(render_node(soup.head.title), response.url)

        with trapped:
            url = soup.head.find('link', rel='image_src')['href']
            response = browser.open(url, follow_meta_redirect=True)
            result = super(IMGurScanner, self).handle(response, log, browser)
            return ScanResult(response=result.response,
                              override_url=result.override_url,
                              title=result.title if title is None else title,
                              content_type=result.content_type,
                              content=result.content,
                              attr=result.attr)

        raise InvalidContent(response, "Couldn't find the image")

예제 #3

파일 보기

파일: html.py 프로젝트: Havvy/madcow

    def handle(self, response, log, browser):
        if response.data_type != 'soup':
            raise InvalidContent(response, 'Not an HTML file')
        soup = response.data
        title = summary = content_type = None

        with trapped:
            title = strip_site_name(render_node(soup.head.title), response.url)

        with trapped:
            summary = self.summarize_soup(soup)
            content_type = 'text/plain'

        if title is None and summary is None and content_type is None:
            raise InvalidContent("couldn't get anything useful out of that..")

        return ScanResult(response=response,
                          override_url=None,
                          title=title,
                          content_type=content_type,
                          content=summary,
                          attr=None)

예제 #4

파일 보기

파일: html.py 프로젝트: seunboi4u/madcow

    def handle(self, response, log, browser):
        if response.data_type != 'soup':
            raise InvalidContent(response, 'Not an HTML file')
        soup = response.data
        title = summary = content_type = None

        with trapped:
            title = strip_site_name(render_node(soup.head.title), response.url)

        with trapped:
            summary = self.summarize_soup(soup)
            content_type = 'text/plain'

        if title is None and summary is None and content_type is None:
            raise InvalidContent("couldn't get anything useful out of that..")

        return ScanResult(response=response,
                          override_url=None,
                          title=title,
                          content_type=content_type,
                          content=summary,
                          attr=None)

예제 #5

파일 보기

파일: youtube.py 프로젝트: seunboi4u/madcow

    def handle(self, response, log, browser, video_id):
        attr = {'video_id': video_id}

        title = None
        if response.data_type == 'soup':
            soup = response.data
            with trapped:
                title = strip_site_name(render_node(soup.head.title),
                                        response.url)
            if self.extra_attr:
                desc = soup.find('div', id='watch-description-clip')

                # this describes what we need to scrape.. youtube is awfully structured.
                # NOTE: this is stupidly slow.. disable if doing any volume.
                for row in (('uploader', desc, 'p', 'id',
                             'watch-uploader-info',
                             None), ('summary', desc, 'p', 'id',
                                     'eow-description', None),
                            ('category', desc, 'p', 'id', 'eow-category',
                             None), ('license', desc, 'p', 'id', 'eow-reuse',
                                     None), ('views', soup, 'span', 'class',
                                             'watch-view-count', None),
                            ('extras', soup, 'ul', 'id',
                             'watch-description-extra-info', 'li'),
                            ('tags', desc, 'ul', 'id', 'eow-tags', 'a')):
                    with trapped:

                        name, parent, tag, key, val, multi = row
                        node = parent.find(tag, **{key: val})
                        attr[name] = [render_node(s) for s in node(multi)
                                      ] if multi else render_node(node)

        return ScanResult(response=response,
                          override_url=None,
                          title=title,
                          content_type=None,
                          content=None,
                          attr=attr)