class item(ItemElement): def klass(self): return BiplanCalendarEventConcert() if self.env['is_concert'] else BiplanCalendarEventTheatre() def condition(self): return (self.el.xpath('./div') and CleanText('./div/a/img/@src')(self)[-1] != '/') def validate(self, obj): return (self.is_valid_event(obj, self.env['city'], self.env['categories']) and self.is_event_in_valid_period(obj.start_date, self.env['date_from'], self.env['date_to'])) def is_valid_event(self, event, city, categories): if city and city != '' and city.upper() != event.city.upper(): return False if categories and len(categories) > 0 and event.category not in categories: return False return True def is_event_in_valid_period(self, event_date, date_from, date_to): if event_date >= date_from: if not date_to: return True else: if event_date <= date_to: return True return False obj_id = Regexp(Link('./div/a'), '/(.*?).html') obj_start_date = CombineDate(BiplanDate('div/div/b'), StartTime('div/div/b')) obj_end_date = CombineDate(BiplanDate('div/div/b'), EndTime('.')) obj_price = BiplanPrice('div/div/b') obj_summary = CleanText("div/div/div/a/strong")
class get_event(ItemElement): klass = BiplanCalendarEventConcert if Env( 'is_concert') else BiplanCalendarEventTheatre def parse(self, el): _div = "//div/div/div[@id='popup']" div = el.xpath("%s" % _div)[0] if self.obj.id: event = self.obj event.url = self.page.url event.description = CleanHTML( "%s/div/div[@class='presentation-popup']" % _div)(self) raise SkipItem() self.env['is_concert'] = (div.attrib['class'] != 'theatre-popup') self.env['url'] = self.page.url obj_id = Env('_id') base = "//div[@id='popup']" obj_price = BiplanPrice("%s/div/b" % base) obj_start_date = CombineDate(BiplanDate("%s/div/b" % base), StartTime("%s/div/b" % base)) obj_end_date = CombineDate(BiplanDate("%s/div/b" % base), EndTime(".")) obj_url = Env('url') obj_summary = CleanText('%s/div/div/span' % base) obj_description = CleanHTML('%s/div/div[@class="presentation-popup"]' % base)
def obj_end_date(self): end_time = Time( Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'), r'- (\d{2}:\d{2})'))(self) end_date = CombineDate(self._date, end_time)(self) if end_date > self.obj_start_date(): end_date += timedelta(days=1) return end_date
class get_event(ItemElement): klass = AgendaDuLibreCalendarEvent def parse(self, el): self.env['url'] = self.page.url obj_id = Env('_id') obj_url = Env('url') obj_summary = CleanText('//meta[@property="DC:title"]/@content') obj_description = CleanHTML('//div[@class="description"]') obj_location = CleanText('//p[@class="full_address"]/span[1]') obj_city = CleanText('//meta[@property="geo:placename"]/@content') obj_start_date = DateTime(CleanText('//meta[@property="DC:date"]/@content')) obj_end_date = CombineDate(DateTime(CleanText('//meta[@property="DC:date"]/@content')), EventEndDate('.'))
class get_event(ItemElement): klass = RazibusCalendarEvent obj_id = Env('_id') obj_summary = CleanText('//h2[@itemprop="name"]') obj_start_date = DateTime( CleanText('//span[@itemprop="startDate"]/time/@datetime')) obj_end_date = CombineDate( DateTime( CleanText('//span[@itemprop="startDate"]/time/@datetime')), EndTime('.')) obj_location = CleanText( '//meta[@property="og:street-address"]/@content') obj_city = CleanText('//meta[@property="og:locality"]/@content') obj_url = CleanText('//meta[@property="og:url"]/@content') obj_description = CleanHTML('//div[@itemprop="description"]')
class item(ItemElement): klass = RazibusCalendarEvent def validate(self, obj): return (self.is_valid_event(obj, self.env['city'], self.env['categories']) and self.is_event_in_valid_period( obj.start_date, self.env['date_from'], self.env['date_to'])) def is_valid_event(self, event, city, categories): if city and city != '' and city.upper() != event.city.upper(): return False if categories and len( categories) > 0 and event.category not in categories: return False return True def is_event_in_valid_period(self, event_date, date_from, date_to): if event_date >= date_from: if not date_to: return True else: if event_date <= date_to: return True return False obj_id = Regexp(Link('./p/strong/a[@itemprop="url"]'), 'http://razibus.net/(.*).html') obj_summary = CleanText('./p/strong/a[@itemprop="url"]') obj_start_date = DateTime( CleanText('./p/span[@itemprop="startDate"]/@content')) obj_end_date = CombineDate( DateTime( CleanText('./p/span[@itemprop="startDate"]/@content')), EndTime('.')) obj_location = CleanText('./p/span[@itemprop="location"]/@content') obj_city = CleanText('./p/span[@itemprop="location"]')
def obj_start_date(self): start_time = Time( Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'), r'(\d{2}:\d{2}) -'))(self) return CombineDate(self._date, start_time)(self)