def parse_event_page(self, response): hxs = HtmlXPathSelector(response) loader = FiveTenFamiliesLoader(item=EventItem(), response=response) #<a href="http://www.510families.com/events/index.php?com=series&sID=12caf9a7138216ad" class="series">View All Dates</a> loader.add_xpath(u"name", "//h1[@itemprop='summary']/text()") loader.add_xpath(u"description", "//div[@itemprop='description']") loader.add_xpath(u"place_name", "normalize-space(//span[@itemprop='name']/text())") loader.add_xpath(u"place_street_addr", "normalize-space(//div[@itemprop='address']/span[@itemprop='street-address']/text())") loader.add_xpath(u"place_city", "//div[@itemprop='address']/span[@itemprop='locality']/text()") loader.add_xpath(u"place_state", "//div[@itemprop='address']/span[@itemprop='region']/text()") loader.add_xpath(u"place_zip", "//div[@itemprop='address']/span[@itemprop='postal-code']/text()") loader.add_xpath(u"place_lat", "//meta[@itemprop='latitude']/@content") loader.add_xpath(u"place_long", "//meta[@itemprop='longitude']/@content") loader.add_value(u"link", response.url) loader.add_value(u"rr_identifier", u"7NYZ") loader.add_value(u"rr_publisher_market", u"SF") # taking the start/end times from event page. # for other dates assuming that the start/end times are the same # TODO check the start/end times for the rest of the dates loader.add_xpath(u"start_time", "//time[@itemprop='startDate']/text()") loader.add_xpath(u"end_time", "//time[@itemprop='startDate']/text()") start_time = loader.get_collected_values(u"start_time") end_time = loader.get_collected_values(u"end_time") name = loader.get_collected_values(u"name") description = loader.get_collected_values(u"description") place_name = loader.get_collected_values(u"place_name") place_street_addr = loader.get_collected_values(u"place_street_addr") place_city = loader.get_collected_values(u"place_city") place_state = loader.get_collected_values(u"place_state") place_zip = loader.get_collected_values(u"place_zip") place_lat = loader.get_collected_values(u"place_lat") place_long = loader.get_collected_values(u"place_long") if "View All Dates" not in response.body: #2013-10-20T03:00:00.0-07:00 try: loader.add_xpath("start_date", "//h2[@class='date']/text()") except Exception, e: raise CloseSpider("error processing start date. %s" % str(e)) start_date = loader.get_collected_values("start_date") loader.replace_value(u"end_freq", start_date) yield loader.load_item()
if "View All Dates" not in response.body: #2013-10-20T03:00:00.0-07:00 try: loader.add_xpath("start_date", "//h2[@class='date']/text()") except Exception, e: raise CloseSpider("error processing start date. %s" % str(e)) start_date = loader.get_collected_values("start_date") loader.replace_value(u"end_freq", start_date) yield loader.load_item() else: for i, dt in enumerate(hxs.select("//ul[@class='series']/li")): new_event_loader = FiveTenFamiliesLoader(item=EventItem(), response=response) new_event_loader.add_value(u"name", name) new_event_loader.add_value(u"description", description) new_event_loader.add_value(u"start_time", start_time) new_event_loader.add_value(u"end_time", end_time) new_event_loader.add_value(u"place_name", place_name) new_event_loader.add_value(u"place_street_addr", place_street_addr) new_event_loader.add_value(u"place_city", place_city) new_event_loader.add_value(u"place_state", place_state) new_event_loader.add_value(u"place_zip", place_zip) new_event_loader.add_value(u"place_lat", place_lat) new_event_loader.add_value(u"place_long", place_long) new_event_loader.add_xpath(