コード例 #1
0
ファイル: WebScraper.py プロジェクト: campellcl/ATS
def extract_entry_trip_mileage(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    entry_trip_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Trip Miles')]/following::span")
    if entry_trip_mileage:
        entry_trip_mileage = entry_trip_mileage.extract()[0]
        mileage_start = entry_trip_mileage.find(">") + len(">")
        entry_trip_mileage = entry_trip_mileage[mileage_start:len(entry_trip_mileage)]
        entry_trip_mileage = entry_trip_mileage[0:entry_trip_mileage.find("<")]
        if entry_trip_mileage != '':
            return float(entry_trip_mileage)
        else:
            return None
    else:
        return None
コード例 #2
0
ファイル: WebScraper.py プロジェクト: campellcl/ATS
def extract_entry_start_loc(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    start_loc = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Starting Location')]/following::span[1]")
    if start_loc:
        start_loc = start_loc.extract()[0]
        start_loc_start = start_loc.find(">") + len(">")
        start_loc = start_loc[start_loc_start:len(start_loc)]
        start_loc = start_loc[0:start_loc.find("<")]
        if start_loc != '':
            return start_loc
        else:
            return None
    else:
        return None
コード例 #3
0
ファイル: WebScraper.py プロジェクト: campellcl/ATS
def extract_entry_destination(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]"
    destination = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Destination')]/following::span[1]")
    if destination:
        destination = destination.extract()[0]
        destination_start = destination.find(">") + len(">")
        destination = destination[destination_start:len(destination)]
        destination = destination[0:destination.find("<")]
        if destination != '':
            return destination
        else:
            return None
    else:
        return None
コード例 #4
0
ファイル: WebScraper.py プロジェクト: campellcl/ATS
def extract_first_journal_url(journal_url):
    domain = "http://www.trailjournals.com/"
    with contextlib.closing(urlopen(journal_url)) as fp:
        source = fp.read()
    first_entry_url_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table//tr[1]"
    first_entry_url = Selector(text=source).xpath(first_entry_url_xpath + "//a[contains(text(), 'First')]")
    if first_entry_url:
        first_entry_url = first_entry_url.extract()[0]
        # Not on the first journal page. Record the first entry url.
        url_start = first_entry_url.find("href=") + len("href=\"")
        first_entry_url = first_entry_url[url_start:len(first_entry_url)]
        first_entry_url = first_entry_url[0:first_entry_url.find("\"")]
        return domain + first_entry_url
    # Already on the first journal page.
    return journal_url
コード例 #5
0
ファイル: user.py プロジェクト: liushahe/tieba-crawler
    def _parse_user_posts_num(self, response):
        """TODO: Docstring for _parse_user_posts_num.

        :response: TODO
        :returns: TODO

        """
        num = Selector(response).css('.userinfo_userdata span:nth-child(4)::text').extract_first()[3:-1]# 发贴:(X)X.X万
        logging.debug('posts num: %s' % (num))
        if num:
            return num if num.find('.') != -1 else float(num) * 10000
        else:
            return 0
コード例 #6
0
ファイル: WebScraper.py プロジェクト: campellcl/ATS
def extract_entry_day_mileage(entry_source):
    trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table[1]"
    day_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Today')]/following::span")
    if day_mileage:
        day_mileage = day_mileage.extract()[0]
        day_mileage_start = str.find(day_mileage, ">") + len(">")
        day_mileage = day_mileage[day_mileage_start:len(day_mileage)]
        day_mileage = day_mileage[0:day_mileage.find("<")]
        if day_mileage != '':
            return float(day_mileage)
        else:
            return None
    else:
        return None
コード例 #7
0
ファイル: user.py プロジェクト: xunux/tieba-crawler
    def _parse_user_posts_num(self, response):
        """TODO: Docstring for _parse_user_posts_num.

        :response: TODO
        :returns: TODO

        """
        num = Selector(response).css(
            '.userinfo_userdata span:nth-child(4)::text').extract_first()[
                3:-1]  # 发贴:(X)X.X万
        logging.debug('posts num: %s' % (num))
        if num:
            return num if num.find('.') != -1 else float(num) * 10000
        else:
            return 0