def get_events(self): from hashlib import md5 ap = self.doc.find('td', {"class": "main"}).findAll('p') year = 0 for p in ap: p = decode_htmlentities(p.string).strip() if not p: continue if not year: year = p.split(' ')[-1] continue event = Event() try: date = p.split(' ')[0].split('/') desc = ' '.join(p.split(' ')[1:]) #event.add('dtstart', dateStart) #event.add('dtstamp', dateStart) #maybe it's better to use NOW() #event.add('dtend', dateEnd) event.add('location', LOCATION) event.add('dtstart;value=date', "%s%.2d%.2d" % (int(year), int(date[1]),int(date[0]))) event.add('summary', desc) #TODO: add other info like the date!! md5text = desc event['uid'] = md5(md5text).hexdigest()+'@supercinemarovereto.it' yield event except: continue
def get_events(self): import re events_desc = str(self.doc).split('];') # we append the local timezone to each time so that icalendar will convert # to UTC in the output lt = LocalTimezone() base_url = 'http://www.isuonidelledolomiti.it' re_url = re.compile('/IT/.*/\?s=\d+') re_hours = re.compile('ore (\d{1,2})') for e in events_desc: print e location, description, summary = '', '', '' date_, hour_ = None, None if not e.startswith('Eventi['): print 'Not an ISDD event' continue a = e.split(' = ') p = remove_html_tags(decode_htmlentities(a[1])).strip().decode("utf-8") if not p: continue list_ = p[1:-1].split('\',') #print list, len(list) event = Event() try: date_ = list_[2][1:].split('/') print date_ if date_: try: print list_[4][1:] hours_match = re.search(re_hours, list_[4][1:]) if hours_match: print 'MATCH!', hours_match.group() hour_ = hours_match.group().split(' ')[1] print hour_ else: hour_ = 0 except ValueError: print "ValueError while retrieving hour" hour_ = 0 datestart = datetime(int(date_[2]), int(date_[1]), int(date_[0]), int(hour_), tzinfo=lt) dateend = datestart + timedelta(hours=default_event_duration) else: print "NON TROVO DATA", p continue print "DateStart: ", datestart print "DateEnd: ", dateend location = list_[-1][1:] #print "location: ", location summary = list_[1][1:] #print "summary: ", summary description = list_[-2][2:] #print "description: ", description url_match = re.search(re_url, p) if url_match: url = base_url + url_match.group() if description: description = description + " - " + url else : description = url event.add('dtstart', datestart) event.add('dtstamp', datestart) # maybe it's better to use NOW() event.add('dtend', dateend) #print summary, location, description event.add('location', location) event.add('description', description) event.add('summary', summary) #TODO: add other info like the date!! event['uid'] = list_[0][1:] yield event except: print 'ERRORE', sys.exc_info()[0] print p print continue