Example #1
0
    def parse_event_page(self, response):
        hxs = HtmlXPathSelector(response)

        loader = FiveTenFamiliesLoader(item=EventItem(), response=response)
        #<a href="http://www.510families.com/events/index.php?com=series&amp;sID=12caf9a7138216ad" class="series">View All Dates</a>

        loader.add_xpath(u"name", "//h1[@itemprop='summary']/text()")

        loader.add_xpath(u"description", "//div[@itemprop='description']")
        loader.add_xpath(u"place_name", "normalize-space(//span[@itemprop='name']/text())")
        loader.add_xpath(u"place_street_addr", "normalize-space(//div[@itemprop='address']/span[@itemprop='street-address']/text())")
        loader.add_xpath(u"place_city", "//div[@itemprop='address']/span[@itemprop='locality']/text()")
        loader.add_xpath(u"place_state", "//div[@itemprop='address']/span[@itemprop='region']/text()")
        loader.add_xpath(u"place_zip", "//div[@itemprop='address']/span[@itemprop='postal-code']/text()")
        loader.add_xpath(u"place_lat", "//meta[@itemprop='latitude']/@content")
        loader.add_xpath(u"place_long", "//meta[@itemprop='longitude']/@content")
        loader.add_value(u"link", response.url)
        loader.add_value(u"rr_identifier", u"7NYZ")
        loader.add_value(u"rr_publisher_market", u"SF")

        # taking the start/end times from event page.
        # for other dates assuming that the start/end times are the same
        # TODO check the start/end times for the rest of the dates
        loader.add_xpath(u"start_time", "//time[@itemprop='startDate']/text()")
        loader.add_xpath(u"end_time", "//time[@itemprop='startDate']/text()")

        start_time = loader.get_collected_values(u"start_time")
        end_time = loader.get_collected_values(u"end_time")

        name = loader.get_collected_values(u"name")
        description = loader.get_collected_values(u"description")
        place_name = loader.get_collected_values(u"place_name")
        place_street_addr = loader.get_collected_values(u"place_street_addr")
        place_city = loader.get_collected_values(u"place_city")
        place_state = loader.get_collected_values(u"place_state")
        place_zip = loader.get_collected_values(u"place_zip")
        place_lat = loader.get_collected_values(u"place_lat")
        place_long = loader.get_collected_values(u"place_long")

        if "View All Dates" not in response.body:
            #2013-10-20T03:00:00.0-07:00
            try:
                loader.add_xpath("start_date", "//h2[@class='date']/text()")
            except Exception, e:
                raise CloseSpider("error processing start date. %s" % str(e))

            start_date = loader.get_collected_values("start_date")
            loader.replace_value(u"end_freq", start_date)
            yield loader.load_item()
Example #2
0
        if "View All Dates" not in response.body:
            #2013-10-20T03:00:00.0-07:00
            try:
                loader.add_xpath("start_date", "//h2[@class='date']/text()")
            except Exception, e:
                raise CloseSpider("error processing start date. %s" % str(e))

            start_date = loader.get_collected_values("start_date")
            loader.replace_value(u"end_freq", start_date)
            yield loader.load_item()

        else:

            for i, dt in enumerate(hxs.select("//ul[@class='series']/li")):
                new_event_loader = FiveTenFamiliesLoader(item=EventItem(), response=response)

                new_event_loader.add_value(u"name", name)
                new_event_loader.add_value(u"description", description)
                new_event_loader.add_value(u"start_time", start_time)
                new_event_loader.add_value(u"end_time", end_time)

                new_event_loader.add_value(u"place_name", place_name)
                new_event_loader.add_value(u"place_street_addr", place_street_addr)
                new_event_loader.add_value(u"place_city", place_city)
                new_event_loader.add_value(u"place_state", place_state)
                new_event_loader.add_value(u"place_zip", place_zip)
                new_event_loader.add_value(u"place_lat", place_lat)
                new_event_loader.add_value(u"place_long", place_long)

                new_event_loader.add_xpath(