Python parse_html_bytes 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: html_to_etree

메소드/함수: parse_html_bytes

hotexamples.com에서의 예제들: 9

Python parse_html_bytes - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 html_to_etree.parse_html_bytes에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: __init__.py 프로젝트: chclxds/thg-framework-1

def _from_url(url):  # pragma: no cover
    """ get list of social media links/handles given a url """
    import requests
    from html_to_etree import parse_html_bytes
    res = requests.get(url)
    tree = parse_html_bytes(res.content, res.headers.get('content-type'))

    return set(find_links_tree(tree))

예제 #2

파일 보기

 def get_social_media_links(self,link, DEFAULT_SM, default_dict):
     res = requests.get(link,verify=False, timeout=30)
     tree = parse_html_bytes(res.content, res.headers.get('content-type'))
     sm_link =  list(find_links_tree(tree))
     for i in sm_link:
         for sm in DEFAULT_SM:
             if sm in i:
                 default_dict[sm] += [i]
     return default_dict

예제 #3

파일 보기

def find_social_links(url) -> set:
    """
    Find the social media links are a webpage.
    - url: The url of the webpage to search (String)
    """
    response = requests.get(url)
    tree = parse_html_bytes(response.content,
                            response.headers.get('content-type'))
    return set(find_links_tree(tree))

예제 #4

파일 보기

    def extract_social_media_from_response(self, content, header):
        tree = parse_html_bytes(content, header.get('content-type'))
        result = {}

        for m in self.metas:
            for link in list(set(find_links_tree(tree))):
                if m in link:
                    result[m] = link
        return result

예제 #5

파일 보기

파일: company.py 프로젝트: DevTotti/Company-Details

def get_social_media(url):
    media = [
        'facebook', 'linkedin', 'twitter', 'youtube', 'github', 'google plus',
        'pinterest', 'instagram', 'snapchat', 'flipboard', 'flickr', 'weibo',
        'periscope', 'telegram', 'soundcloud', 'feedburner', 'vimeo',
        'slideshare', 'vkontakte', 'xing'
    ]
    res = requests.get(url)
    social = dict()
    tree = parse_html_bytes(res.content, res.headers.get('content-type'))
    links = set(find_links_tree(tree))
    for i in range(len(media)):
        for link in links:
            if media[i] in link:
                social[media[i]] = link
    return social

예제 #6

파일 보기

파일: test_encoding.py 프로젝트: fluquid/html-to-etree

def test_tree():
    for rec in get_data():
        meta = rec['meta']
        url = meta['url']
        contains = meta['contains']

        webdata = rec['webdata']
        body = webdata['byte_body']
        content_type = webdata['content-type']

        if len(clean_html(body)) < 100:
            logging.warning('skipping %s', url)
            continue

        tree = parse_html_bytes(body=body, content_type=content_type)

        assert contains in extract_text(tree), (url, contains,
                                                etree.tostring(
                                                    tree, encoding='utf-8'))

예제 #7

파일 보기

 def _request_html(self, url):
     html = requests.get(url).content
     print(html)
     return parse_html_bytes(html)

예제 #8

파일 보기

파일: __init__.py 프로젝트: guoyu07/email-audit

def audit_html_bytes(body, content_type=''):
    """ audit html with given bytestring body and header content_type """
    logging.debug('parse_html_bytes')
    tree = parse_html_bytes(body, content_type)
    return audit_etree(tree)

예제 #9

파일 보기

import requests
from html_to_etree import parse_html_bytes
from extract_social_media import find_links_tree

res = requests.get('https://github.com/HarshCasper/Rotten-Scripts')

tree = parse_html_bytes(res.content, res.headers.get('content-type'))

links = set(find_links_tree(tree))
print(links)