Ejemplo n.º 1
0
  def _get_parser(self):    
    doc     = html_util.fetch_and_parse(self.BASE_URL)

    events  = html_util.get_first_element(doc, '.defaultText')
    content = html_util.get_displayed_text_content(events).strip()

    for line in content.split('\n'):
      if self.SHOW_START_RE.match(line):
        show = self._parse_show(line)
        
        if show:
          yield show
Ejemplo n.º 2
0
  def _parse_show(self, date_txt, info_el):
    logger.debug('Parsing show in %s' % date_txt)

    info_txt = html_util.get_displayed_text_content(info_el)
    
    performers    = []
    show_time_txt = None
    
    for line in info_txt.split('\n'):
      match = self.PERFORMER_RE.match(line)
      
      if match:
        time_txt, name = match.group('time'), match.group('performer')
        
        show_time_txt = time_txt
        
        performers.append(Performer(name, start_time = time_txt))
    
    if len(performers) == 0:
      return None

    show = Show()

    show.venue      = self.venue()
    show.performers = performers
    show.show_time  = date_util.parse_date_and_time(date_txt, show_time_txt)

    show.resources.resource_uris = self.resource_extractor.extract_resources(info_el)
    
    # Fontanas's stores the large image in an anchor tag
    for a in info_el.iter(tag = 'a'):
      if self.IMAGE_RE.search(a.get('href', '')):
        show.resources.image_url = a.get('href')

    date_util.adjust_fuzzy_years(show, self._parse_started)

    return show
Ejemplo n.º 3
0
  def _parse_show(self, event_detail):
    show = Show()
    
    performers = []
    
    content  = html_util.get_displayed_text_content(event_detail).strip()
    date_txt = None

    # This flag is set up and down to allow either of the following to be processed:
    # 1st: Ava Luna
    # or
    # 1st:
    # Ava Luna
    had_num  = True
    
    logger.debug("Parsing show content: %s" % content)
    
    for line in content.split('\n'):
      if line:
        time_match = date_util.STRICT_TIME_RE.search(line)

        if not date_txt:
          date_txt = line
        elif time_match:
          show.show_time = date_util.parse_date_and_time(date_txt, time_match.group('time'))
          
          line = date_util.STRICT_TIME_RE.sub('', line).strip(': ')
          
          if line:
            performers.append(Performer(line))

            had_num = False
          else:
            had_num = True
        elif self.NUM_RE.match(line):
          line = self.NUM_RE.sub('', line).strip()
          
          if line:
            performers.append(Performer(line))

            had_num = False
          else:
            had_num = True
        elif had_num:
          performers.append(Performer(line))
          had_num = False
        else:
          logger.error('Unknown line format: %s' % line)
            
    show.venue      = self.venue()
    show.performers = performers
    show.date       = date_util.parse_date_and_time(date_txt, None)
    
    show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

    for img_tag in event_detail.iter(tag = 'img'):
      show.resources.image_url = img_tag.get('src')
      
      break

    date_util.adjust_fuzzy_years(show, self._parse_started)

    return show
Ejemplo n.º 4
0
def TextMatcher(node, match_exp):
  text = html_util.get_displayed_text_content(node)
  
  for m in match_exp.finditer(text):
    yield m