コード例 #1
0
    def parse_well_data(self, response):
        self.crawler.stats.inc_value('2_welldata_response_count', spider=self)
        item = response.meta['current_item']
        hxs = HtmlXPathSelector(response)
        tds = hxs.select('//td')
        # Note that there may be two instances of lat/lng in the record,
        # one is 'as planned' and the 2nd is 'as built'.
        # We want the 2nd if it's there.
        try:
            lat, lng = find_well_data(tds,
                                      parse_well_latlng,
                                      "Lat/Long:",
                                      all=True)[-1]
        except IndexError:
            lat = lng = None
        well_status = find_well_data(tds, parse_text, "Status:", embedded=True)
        well_spud_date = find_well_data(tds,
                                        parse_date,
                                        "Spud Date:",
                                        nexttd=True)

        if lat:
            item['well_lat'] = lat
            item['well_lng'] = lng
        if well_status and well_status != item.get('well_status'):
            item['well_status'] = well_status
            item['well_status_date'] = convert_fuzzy_date(str(date.today()))
        elif 'well_status' not in item:
            # To avoid keyerrors in feed generator, make sure well keys exist
            item['well_status'] = None
            item['well_status_date'] = None

        if well_spud_date or 'well_spud_date' not in item:
            item['well_spud_date'] = well_spud_date

        for result in self.process_permit_item(item):
            yield result
コード例 #2
0
ファイル: CogisScraper.py プロジェクト: SkyTruth/scraper
def extract_date(td):
    raw_date = extract_text(td)
    if raw_date is None or raw_date == '' or raw_date == 'N/A':
        return ""
    date = convert_fuzzy_date(raw_date).split()[0]
    return date
コード例 #3
0
def extract_date(td):
    raw_date = extract_text(td)
    if raw_date is None or raw_date == '' or raw_date == 'N/A':
        return ""
    date = convert_fuzzy_date(raw_date).split()[0]
    return date
コード例 #4
0
def parse_date(text):
    return None if not text else convert_fuzzy_date(text).split()[0]