def _parse_show(self, event_detail): if html_util.get_first_element(event_detail, 'h2', optional = True) is None: return None show = Show() date_txt = html_util.get_first_element(event_detail, 'h2').text_content() performers_txt = html_util.get_first_element(event_detail, '.caption').text_content() show.venue = self.venue() show.performers = [Performer(p) for p in lang_util.parse_performers(performers_txt)] if not date_txt.lower().startswith('every'): show.date = date_util.parse_date_and_time(date_txt, None) show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail) for img_tag in event_detail.iter(tag = 'img'): show.resources.image_url = img_tag.get('src') break date_util.adjust_fuzzy_years(show, self._parse_started) return show
def _parse_show(self, base_date, links): performers = [] show_time = None resource_els = [] for a in links: # Every other link on the calendar seems to have no text if a.text_content(): name, start_time_txt, artist_el = self._parse_artist(a) if artist_el is not None: resource_els.append(artist_el) if start_time_txt: start_time = date_util.parse_date_and_time(base_date, start_time_txt) if not show_time or start_time < show_time: show_time = start_time performers.append(Performer(name, start_time = start_time_txt)) # Performers are list from first to last performers.reverse() resource_els.reverse() show = Show() show.venue = self.venue() show.performers = performers show.date = base_date show.show_time = show_time show.resources.resource_uris = self.resource_extractor.extract_resources(*resource_els) image_url = None for el in resource_els: if image_url: break for img_tag in el.iter(tag = 'img'): image_url = img_tag.get('src') break show.resources.image_url = image_url return show
def _trans_record(self, record): show = Show() show.venue = Venue(record.get('venue-name'), record.get('venue-url')) show.title = record.get('title') show.merge_key = record.get('merge-key') performers = [] if record.get('performers'): for performer in record['performers'].split(','): performers.append(Performer(performer.strip())) if record.get('tags'): show.tags = [t.strip() for t in record['tags'].split(',')] date_txt = record.get('show-date') if not date_txt: raise Exception('Show Date is required') else: show.date = date_util.parse_date_time(date_txt) if performers: show.performers = performers if record.get('show-time'): show.show_time = date_util.parse_date_and_time(date_txt, record.get('show-time')) if record.get('door-time'): show.show_time = date_util.parse_date_and_time(date_txt, record.get('door-time')) show.resources.show_url = record.get('show-url') show.resources.image_url = record.get('image-url') show.resources.resource_uris = self.resource_extractor.extract_resources(self._create_resource_doc(record)) return show
def _parse_show(self, event_detail): show = Show() performers = [] content = html_util.get_displayed_text_content(event_detail).strip() date_txt = None # This flag is set up and down to allow either of the following to be processed: # 1st: Ava Luna # or # 1st: # Ava Luna had_num = True logger.debug("Parsing show content: %s" % content) for line in content.split('\n'): if line: time_match = date_util.STRICT_TIME_RE.search(line) if not date_txt: date_txt = line elif time_match: show.show_time = date_util.parse_date_and_time(date_txt, time_match.group('time')) line = date_util.STRICT_TIME_RE.sub('', line).strip(': ') if line: performers.append(Performer(line)) had_num = False else: had_num = True elif self.NUM_RE.match(line): line = self.NUM_RE.sub('', line).strip() if line: performers.append(Performer(line)) had_num = False else: had_num = True elif had_num: performers.append(Performer(line)) had_num = False else: logger.error('Unknown line format: %s' % line) show.venue = self.venue() show.performers = performers show.date = date_util.parse_date_and_time(date_txt, None) show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail) for img_tag in event_detail.iter(tag = 'img'): show.resources.image_url = img_tag.get('src') break date_util.adjust_fuzzy_years(show, self._parse_started) return show