Exemple #1
0
def get_script(url):
    html = requests.get(url).content
    tree = HTML(html)

    try:
        script_html = tree.cssselect(".scrolling-script-container")[0]
        script = "".join([text for text in script_html.itertext()])
        return script
    except Exception as e:
        return None
Exemple #2
0
#####################
#  获取SF首页的标题
#####################
from lxml.etree import HTML
import requests

url = 'https://segmentfault.com/'
css_selector = '.title>a'  # 这是利用浏览器自动获取的,我甚至都不用知道它是什么意思

text = requests.get(url).text
page = HTML(text)

titles = []
for title in page.cssselect(css_selector):
    titles.append(title.text)

print(titles)

# 这一段程序写下来,不用动脑筋(无脑写),不消耗心智
Exemple #3
0
def extract_links(toot):
    '''Extract all external links from a toot.'''

    html = HTML(toot['content'])
    all_links = html.cssselect('a')
    return [link.attrib['href'] for link in all_links if not link_is_internal(link)]