Python CSSSelector.text_content 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lxml.cssselect

클래스/타입: CSSSelector

메소드/함수: text_content

hotexamples.com에서의 예제들: 5

Python CSSSelector.text_content - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lxml.cssselect.CSSSelector.text_content에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CSSSelector(30)

get(6)

evaluate(5)

getparent(4)

iter(3)

replace(3)

text_content(3)

lower(2)

append(1)

cssselect(1)

find(1)

findall(1)

getchildren(1)

getiterator(1)

pop(1)

split(1)

예제 #1

파일 보기

파일: taxonomy.py 프로젝트: zgsxwsdxg/AI-metrics

    def get_submission_dates(self, arxiv_tree, queried_version):
        links = CSSSelector("div.submission-history")(arxiv_tree)[0]
        versions = {}
        #print "Parsing", links.text_content()
        for line in links.text_content().split("\n"):
            match = self.version_re.match(line)
            if match:
                version, d = match.group(1), match.group(2)
                d = datetime.datetime.strptime(d, '%a, %d %b %Y').date()
                versions[version] = d
                if queried_version == version:
                    return {version: d}
                #print version, date

        return versions

예제 #2

파일 보기

파일: taxonomy.py 프로젝트: seasky100/AI-metrics

    def get_submission_dates(self, arxiv_tree, queried_version):
        links = CSSSelector("div.submission-history")(arxiv_tree)[0]
        versions = {}
        #print "Parsing", links.text_content()
        for line in links.text_content().split("\n"):
            match = self.version_re.match(line)
            if match:
                version, d = match.group(1), match.group(2)
                d = datetime.datetime.strptime(d,'%a, %d %b %Y').date()
                versions[version] = d
                if queried_version == version:
                    return {version: d}
                #print version, date

        return versions

예제 #3

파일 보기

    def get_submission_dates(self, arxiv_tree, queried_version):
        links = CSSSelector("div.submission-history")(arxiv_tree)[0]
        #print("links are", links)
        versions = {}
        blob = self.clean_gunky_arxiv_data(links.text_content())

        #print( "Parsing", blob)
        for line in blob.split("\n"):
            match = self.version_re.match(line)
            if match:
                version, d = match.group(1), match.group(2)
                d = datetime.datetime.strptime(d, '%a, %d %b %Y').date()
                versions[version] = d
                if queried_version == version:
                    return {version: d}
                #print(version, date)

        return versions

예제 #4

파일 보기

def _get_post_details(post_listing):
    """Scrape a post and return as a Post object."""

    title_node = CSSSelector("a.topictitle")(post_listing)[0]
    title = title_node.text_content()

    url = _forum_url + title_node.get("href")[2:]

    if rp.can_fetch("*", url):

        print "Scraping post: " + title

        post_page = lxml.html.fromstring(_get_page(url))

        author = _get_post_author(post_page)
        content = _get_post_content(post_page)
        images = _get_post_images(post_page)
        privateMessageLink = _get_private_message_link(post_page)

        return Post(title, author, url, content, images, privateMessageLink)
    else:
        _robots_not_allowed(url)
        return None

예제 #5

파일 보기

import requests
import lxml.html
from lxml.cssselect import CSSSelector


# get page
url = sys.argv[1]
page = requests.get(url).text
page = page.replace('\xa0', ' ')
tree = lxml.html.fromstring(page)


# get title
title_tag = CSSSelector('div#main h1')(tree)[0]
title = title_tag.text_content()
fb2 = title.find(' (fb2)')
if fb2 != -1:
    title = title[:fb2]


# get text
text_tag = CSSSelector('div#main div._ga1_on_')(tree)[0]
text = text_tag.text_content().strip()


# get refs
ref_sup_tags = CSSSelector('sup')(text_tag)
ref_tags = [CSSSelector('a')(ref_sup_tag)[1] for ref_sup_tag in ref_sup_tags]
refs = [ref_tag.get('title').strip() for ref_tag in ref_tags]