def extract_entry_trip_mileage(entry_source): trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]" entry_trip_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Trip Miles')]/following::span") if entry_trip_mileage: entry_trip_mileage = entry_trip_mileage.extract()[0] mileage_start = entry_trip_mileage.find(">") + len(">") entry_trip_mileage = entry_trip_mileage[mileage_start:len(entry_trip_mileage)] entry_trip_mileage = entry_trip_mileage[0:entry_trip_mileage.find("<")] if entry_trip_mileage != '': return float(entry_trip_mileage) else: return None else: return None
def extract_entry_start_loc(entry_source): trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]" start_loc = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Starting Location')]/following::span[1]") if start_loc: start_loc = start_loc.extract()[0] start_loc_start = start_loc.find(">") + len(">") start_loc = start_loc[start_loc_start:len(start_loc)] start_loc = start_loc[0:start_loc.find("<")] if start_loc != '': return start_loc else: return None else: return None
def extract_entry_destination(entry_source): trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]//table[1]//tr[3]" destination = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Destination')]/following::span[1]") if destination: destination = destination.extract()[0] destination_start = destination.find(">") + len(">") destination = destination[destination_start:len(destination)] destination = destination[0:destination.find("<")] if destination != '': return destination else: return None else: return None
def extract_first_journal_url(journal_url): domain = "http://www.trailjournals.com/" with contextlib.closing(urlopen(journal_url)) as fp: source = fp.read() first_entry_url_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table//tr[1]" first_entry_url = Selector(text=source).xpath(first_entry_url_xpath + "//a[contains(text(), 'First')]") if first_entry_url: first_entry_url = first_entry_url.extract()[0] # Not on the first journal page. Record the first entry url. url_start = first_entry_url.find("href=") + len("href=\"") first_entry_url = first_entry_url[url_start:len(first_entry_url)] first_entry_url = first_entry_url[0:first_entry_url.find("\"")] return domain + first_entry_url # Already on the first journal page. return journal_url
def _parse_user_posts_num(self, response): """TODO: Docstring for _parse_user_posts_num. :response: TODO :returns: TODO """ num = Selector(response).css('.userinfo_userdata span:nth-child(4)::text').extract_first()[3:-1]# 发贴:(X)X.X万 logging.debug('posts num: %s' % (num)) if num: return num if num.find('.') != -1 else float(num) * 10000 else: return 0
def extract_entry_day_mileage(entry_source): trip_info_xpath = "/html/body/table//tr[4]/td/table/tr//td[2]/table[1]" day_mileage = Selector(text=entry_source).xpath(trip_info_xpath + "//td//span[contains(text(), 'Today')]/following::span") if day_mileage: day_mileage = day_mileage.extract()[0] day_mileage_start = str.find(day_mileage, ">") + len(">") day_mileage = day_mileage[day_mileage_start:len(day_mileage)] day_mileage = day_mileage[0:day_mileage.find("<")] if day_mileage != '': return float(day_mileage) else: return None else: return None
def _parse_user_posts_num(self, response): """TODO: Docstring for _parse_user_posts_num. :response: TODO :returns: TODO """ num = Selector(response).css( '.userinfo_userdata span:nth-child(4)::text').extract_first()[ 3:-1] # 发贴:(X)X.X万 logging.debug('posts num: %s' % (num)) if num: return num if num.find('.') != -1 else float(num) * 10000 else: return 0