def exemption_records_since_summary( since_date ): since_date = numerate_date(since_date) for curr_exemption_record in exemption_record.iter_records(): if not curr_exemption_record.has_updated_since(since_date): continue yield { 'exemption_record':curr_exemption_record, 'entity_record':curr_exemption_record.get_entity() }
def __init__( self, updated_since, *p, **d ): if updated_since == 'yesterday': yesterday = datetime.now() - timedelta(days=2) updated_since = yesterday.strftime('%d/%m/%y') if updated_since == 'last_week': yesterday = datetime.now() - timedelta(days=8) updated_since = yesterday.strftime('%d/%m/%y') self.scrape_updated_since = numerate_date( updated_since ) exemption_tables_scraper.publisher_scraper.__init__( self, *p, **d )
def extract_records( self ): records = self.web_page.extract_page_data() ret = [] for record in records: self.extended_web_page.go_to_url( record['url'] ) extended_data = self.extended_web_page.extract_page_data() record.update( extended_data ) if numerate_date(record['last_update_date']) < self.scrape_updated_since: self.state['done'] = True break ret.append( record ) return ret