コード例 #1
0
 def parse(self, response):
     '''
     Parse starting page. Extract event list, then yield further request for each event
     '''
     
     # load existing events for filtering
     existing_urls = load_existing_urls(self.name)
     
     # extract all events related content
     events_element = response.xpath('//div[@class="wrapper clearfix "]//div[contains(@class, "views-row")]')
     self.logger.info('Found %d events on url %s', len(events_element), response.url)
     
     for event in events_element:
         title = event.xpath('.//div[@class="views-field-title"]//a/text()').extract_first()
         url = urlparse.urljoin(response.url, event.xpath('.//div[@class="views-field-title"]//a/@href').extract_first())
         
         if url not in existing_urls:
             category = event.xpath('.//div[@class="views-field-field-event-category-value-1"]//a/text()').extract_first()
             
             # yield request for newly found event
             yield Request(url, meta={"category": category}, callback=self.parse_one_event)
         else:
             self.logger.info('Event %s with url %s has already been parsed', title, url)
コード例 #2
0
 def __init__(self):
     # load existing urls for filtering
     self.existing_urls = load_existing_urls(self.name)
コード例 #3
0
 def __init__(self):
     # load existing urls for whatshappen
     self.existing_urls = load_existing_urls(self.name)
コード例 #4
0
 def __init__(self):
     """Load existing urls for source eventfinda"""
     self.existing_urls = load_existing_urls(self.name)