Ejemplo n.º 1
0
  def _parse_show(self, link):
    event_doc = html_util.fetch_and_parse(link, parse_500 = True)

    event_detail = html_util.get_first_element(event_doc,    ".event-detail")
    artist_info  = html_util.get_first_element(event_doc,    ".artist-boxes")
    
    date_txt     = html_util.get_first_element(event_detail, ".dates").text_content()
    
    performers = [] 
    
    for el in html_util.get_elements(event_doc, '.headliners'):
      for name in lang_util.parse_performers(el.text_content()):
        performers.append(Performer(name, headliner = True))

    for el in html_util.get_elements(event_doc, '.supports'):
      for name in lang_util.parse_performers(el.text_content()):
        performers.append(Performer(name, headliner = False))

    show = Show()

    show.merge_key  = link
    show.venue      = self.venue()
    show.performers = performers
    show.show_time  = date_util.parse_date_and_time(date_txt, html_util.get_first_element(event_detail, ".times").text_content())

    show.resources.show_url      = link
    show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail, artist_info)

    img = html_util.get_first_element(event_detail, "img", optional = True)
    
    if img is not None:
      show.resources.image_url = img.get('src')

    return show
Ejemplo n.º 2
0
  def _get_parser(self):
    calendar_file = urllib2.urlopen(self.CALENDAR_URL)
    
    calendar = ''
    
    for data in calendar_file:
      calendar += data
    
    # Europa has an errant closing html tag at the top of the document that messes up lxml
    calendar = calendar.replace('</html>', '')
    
    doc = lxml.html.document_fromstring(calendar)

    doc.make_links_absolute(self.CALENDAR_URL)
    
    main_section = None
    
    for el in doc.iter(tag = 'table'):
      if el.get('width') == '756':
        main_section = el
        
        break
        
    if main_section is None:
      raise Exception('Unable to find main section')
    
    for el in html_util.get_elements(main_section, 'table'):
      if el.get('width') == '700' and el.get('height') == '80':
        show =  self._parse_show(el)
      
        if show:
          yield show
Ejemplo n.º 3
0
  def _parse_show(self, link):
    LOG.debug("Fetching show: %s" % link)

    event_doc    = html_util.fetch_and_parse(link)

    event_detail = event_doc.get_element_by_id("mainColumn")

    show = Show()
    
    for performer in html_util.get_elements(event_detail, 'h1'):
      name = performer.text_content().strip(' \n\r\t')
      if name:
        show.performers.append(Performer(name))
      
    date_txt = html_util.get_first_element(event_detail, '.date').text_content()
    
    event_match = self.EVENT_URL.match(link)

    show.merge_key = event_match.group('page_id')
    show.venue     = self.venue()
    show.show_time = date_util.parse_date_time(date_txt).replace(hour = 21)
    
    LOG.debug('Date: %s' % show.date)
    
    show.resources.show_url      = link
    show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

    for img_tag in event_detail.iter(tag = 'img'):
      if 'main' in img_tag.get('src'):
        show.resources.image_url = img_tag.get('src')
        
        break

    return show
Ejemplo n.º 4
0
  def _parse_v2(self, doc):
    content    = parsing.get_first_element(doc, '.content.contentMid')

    html_boxes = list(parsing.get_elements(content, '.htmlBoxModule'))

    resources = self.resource_extractor.extract_resources(*html_boxes)
    
    return ArtistProfileParserResult(resources)
Ejemplo n.º 5
0
 def _parse_shows(self, base_date, td):
   day = int(html_util.get_first_element(td, '.day').text_content())
   
   date = base_date.replace(day = day)
   
   logger.debug('Parsing shows on %s' % date.strftime('%F'))
   
   lr_shows     = html_util.get_elements(td, '.lr_color a')
   googie_shows = html_util.get_elements(td, '.googie_color a')
   
   shows = []
   
   if lr_shows:
     shows.append(self._parse_show(date, lr_shows))
     
   if googie_shows:
     shows.append(self._parse_show(date, googie_shows))
     
   return shows
Ejemplo n.º 6
0
  def _month_parser(self, request_date):
    month_url = '%scalendar/%d-%d' % (self.BASE_URL, request_date.year, request_date.month)
    
    logger.debug('Parsing: %s' % month_url)

    doc = html_util.fetch_and_parse(month_url)
    
    main_table = html_util.get_first_element(doc, '.month-view table')
    
    for td in html_util.get_elements(main_table, 'td.has-events'):
      for show in self._parse_shows(request_date, td):
        yield show
Ejemplo n.º 7
0
    def _parse_show(self, link):
        event_doc = html_util.fetch_and_parse(link)
        match = self.IS_EVENT.match(link)

        event_id = int(match.group("event_id"))
        event_detail = html_util.get_first_element(event_doc, ".tfly-event-id-%d" % event_id)

        date_txt = html_util.get_first_element(event_doc, ".dates").text_content()
        time_txt = html_util.get_first_element(event_doc, ".times").text_content()

        img = html_util.get_first_element(event_detail, "img")

        performers = []

        for p in html_util.get_elements(event_detail, ".headliners"):
            performers.append(Performer(p.text_content(), headliner=True))

        for p in html_util.get_elements(event_detail, ".supports"):
            for pi in lang_util.parse_performers(p.text_content()):
                performers.append(Performer(pi, headliner=False))

        show = Show()

        show.merge_key = link
        show.venue = self.venue()
        show.performers = performers
        show.show_time = date_util.parse_show_time(date_txt, time_txt)
        show.door_time = date_util.parse_door_time(date_txt, time_txt)

        show.resources.show_url = link
        show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

        if img is not None:
            show.resources.image_url = img.get("src")

        return show