Python Selector.find 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: scrapy

클래스/타입: Selector

메소드/함수: find

hotexamples.com에서의 예제들: 7

Python Selector.find - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 scrapy.Selector.find에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Selector(30)

css(30)

split(30)

xpath(30)

re(24)

extract(22)

replace(11)

strip(9)

__len__(8)

remove_namespaces(7)

startswith(7)

find(6)

select(6)

__contains__(4)

extract_first(3)

index(3)

append(2)

register_namespace(2)

re_first(2)

group(2)

get(2)

findall(2)

endswith(1)

rsplit(1)

json(1)

select_by_visible_text(1)

isdigit(1)

예제 #1

파일 보기

파일: WebScraper.py 프로젝트: campellcl/ATS

def extract_entry_trip_mileage(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    entry_trip_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Trip Miles')]/following::span")
    if entry_trip_mileage:
        entry_trip_mileage = entry_trip_mileage.extract()[0]
        mileage_start = entry_trip_mileage.find(">") + len(">")
        entry_trip_mileage = entry_trip_mileage[mileage_start:len(entry_trip_mileage)]
        entry_trip_mileage = entry_trip_mileage[0:entry_trip_mileage.find("<")]
        if entry_trip_mileage != '':
            return float(entry_trip_mileage)
        else:
            return None
    else:
        return None

예제 #2

파일 보기

파일: WebScraper.py 프로젝트: campellcl/ATS

def extract_entry_start_loc(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    start_loc = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Starting Location')]/following::span[1]")
    if start_loc:
        start_loc = start_loc.extract()[0]
        start_loc_start = start_loc.find(">") + len(">")
        start_loc = start_loc[start_loc_start:len(start_loc)]
        start_loc = start_loc[0:start_loc.find("<")]
        if start_loc != '':
            return start_loc
        else:
            return None
    else:
        return None

예제 #3

파일 보기

파일: WebScraper.py 프로젝트: campellcl/ATS

def extract_entry_destination(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    destination = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Destination')]/following::span[1]")
    if destination:
        destination = destination.extract()[0]
        destination_start = destination.find(">") + len(">")
        destination = destination[destination_start:len(destination)]
        destination = destination[0:destination.find("<")]
        if destination != '':
            return destination
        else:
            return None
    else:
        return None

예제 #4

파일 보기

파일: WebScraper.py 프로젝트: campellcl/ATS

def extract_first_journal_url(journal_url):
    domain = "http://www.trailjournals.com/"
    with contextlib.closing(urlopen(journal_url)) as fp:
        source = fp.read()
    first_entry_url_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table//tr[1]"
    first_entry_url = Selector(text=source).xpath(first_entry_url_xpath + "//a[contains(text(), 'First')]")
    if first_entry_url:
        first_entry_url = first_entry_url.extract()[0]
        # Not on the first journal page. Record the first entry url.
        url_start = first_entry_url.find("href=") + len("href=\"")
        first_entry_url = first_entry_url[url_start:len(first_entry_url)]
        first_entry_url = first_entry_url[0:first_entry_url.find("\"")]
        return domain + first_entry_url
    # Already on the first journal page.
    return journal_url

예제 #5

파일 보기

파일: user.py 프로젝트: liushahe/tieba-crawler

    def _parse_user_posts_num(self, response):
        """TODO: Docstring for _parse_user_posts_num.

        :response: TODO
        :returns: TODO

        """
        num = Selector(response).css('.userinfo_userdata span:nth-child(4)::text').extract_first()[3:-1]# 发贴:(X)X.X万
        logging.debug('posts num: %s' % (num))
        if num:
            return num if num.find('.') != -1 else float(num) * 10000
        else:
            return 0

예제 #6

파일 보기

파일: WebScraper.py 프로젝트: campellcl/ATS

def extract_entry_day_mileage(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table[1]"
    day_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Today')]/following::span")
    if day_mileage:
        day_mileage = day_mileage.extract()[0]
        day_mileage_start = str.find(day_mileage, ">") + len(">")
        day_mileage = day_mileage[day_mileage_start:len(day_mileage)]
        day_mileage = day_mileage[0:day_mileage.find("<")]
        if day_mileage != '':
            return float(day_mileage)
        else:
            return None
    else:
        return None

예제 #7

파일 보기

파일: user.py 프로젝트: xunux/tieba-crawler

    def _parse_user_posts_num(self, response):
        """TODO: Docstring for _parse_user_posts_num.

        :response: TODO
        :returns: TODO

        """
        num = Selector(response).css(
            '.userinfo_userdata span:nth-child(4)::text').extract_first()[
                3:-1]  # 发贴:(X)X.X万
        logging.debug('posts num: %s' % (num))
        if num:
            return num if num.find('.') != -1 else float(num) * 10000
        else:
            return 0