def _process_start_time(dt): '''Parse datetime string from 2016-03-15T13:00:00+08:00 to datetime object''' try: date_time = re.findall(r'\d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2}', dt)[0] time_raw = datetime.datetime.strptime(date_time, '%Y-%m-%dT%H:%M:%S') return totimestamp(time_raw) except Exception, e: logger.error('Error while parsing start time: %s', e.message) return None
def _process_start_time(dt): """ Parse datetime format as 2016-03-15T13:00:00+08:00 to datetime object """ try: date_time = re.findall(r"\d{4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2}", dt)[0] time_raw = datetime.datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S") return totimestamp(time_raw) except Exception, e: logging.error("Error while parsing start time: %s", e.message, exc_info=True) return None
def parse_one_event(url, sub_browser): ''' Parse event detail page, the input parameters are url and web driver ''' try: sub_browser.get(url) title_element = sub_browser.find_element_by_xpath('//div[@id="field-event-name"]') title = title_element.text try: cover_element = sub_browser.find_element_by_xpath('//div[@id="visual-banner-preview"]/img') cover_url = cover_element.get_attribute('src') except NoSuchElementException: cover_url = None description_elements = sub_browser.find_element_by_xpath('//div[@id="field-event-description"]') detail = description_elements.text venue_element = sub_browser.find_element_by_xpath('//div[@id="field-event-venue_name"]') venue = venue_element.text location_element = sub_browser.find_element_by_xpath('//div[@id="field-event-address"]') location = location_element.text address = venue + ', ' + location time_element = sub_browser.find_element_by_xpath('//div[@id="field-event-datetime"]') time_text = time_element.text date_tuple = re.search(r'(\w{3}) (\d{1,2}).*?(\d{4})', time_text).groups() time_tuple = re.search(r'\n(\d+):(\d{2}) (\w{2})', time_text).groups() datetime_str = '-'.join(date_tuple+time_tuple) start_time = totimestamp(datetime.datetime.strptime(datetime_str, '%b-%d-%Y-%I-%M-%p')) try: category_element = sub_browser.find_element_by_xpath('//div[@class="tag-list"]') category = category_element.text except NoSuchElementException: category = None try: ticket_s = sub_browser.find_element_by_xpath('//dl[@class="tix-detail tickets tix-detail-wide cf"]').text is_free = 1 if u'Free' in ticket_s else 0 except NoSuchElementException: is_free = 0 event_info = { 'source': 'peatix', 'source_url': url, 'title': title, 'cover_url': cover_url, 'detail': detail, 'location': address, 'start_time': start_time, 'event_website': None, # don't have 'is_free': is_free, 'category': category, } logger.info('Parse event with url %s successfully.', url) return event_info except Exception: logger.error('Parse event with url %s Error.', url, exc_info=True) return None