Ejemplo n.º 1
0
  def _parse_show(self, show_date, show_detail, show_time):
    show = Show()
    
    time_txt = ','.join([p for p in show_time.text_content().split(',') if not self.PRICE_OR_AGE.search(p)])

    logger.debug('Show: %s - %s' % (time_txt, show_time.text_content().strip(' \n')))
    show.venue      = self.venue()
    show.performers = self._parse_performers(show_detail)
    show.show_time  = date_util.parse_show_time(show_date, time_txt)
    show.door_time  = date_util.parse_door_time(show_date, time_txt)

    # TODO right now the below parsing doesn't work, so just skip these shows for now
    if not show.show_time and not show.door_time:
      time_match = self.TIME_RE.search(time_txt)
      
      if time_match:
        show.door_time = date_util.parse_date_and_time(show_date, time_match.group('time'))

    show.resources.resource_uris = self.resource_extractor.extract_resources(show_detail, show_time)

    # TODO work could be done here to find larger images (sometimes the img's are enclosed in an anchor tag)
    for img_tag in show_detail.iter(tag = 'img'):
      src = img_tag.get('src')
      
      # Skip the images that show the early shows, later shows, and the 5 years logo
      if not ('early' in src or 'later' in src or '5years' in src):
        show.resources.image_url = src
        
        break

    return show
Ejemplo n.º 2
0
  def _parse_show(self, el):    
    event_detail = html_util.get_first_element(el, '.event-details')

    date_txt     = html_util.get_first_element(event_detail, 'strong').text
    time_txt     = event_detail.text_content()

    show = Show()

    show.venue      = self.venue()
    
    title_txt       = html_util.get_first_element(event_detail, '.event-name').text_content()
    show.performers = [Performer(p) for p in lang_util.parse_performers(title_txt)]
    show.show_time  = date_util.parse_show_time(date_txt, time_txt)
    show.door_time  = date_util.parse_door_time(date_txt, time_txt)
    
    show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

    img = html_util.get_first_element(el, ".event-image img", optional = True)
    
    if img is not None:
      show.resources.image_url = img.get('src')
      
    date_util.adjust_fuzzy_years(show, self._parse_started)

    return show
Ejemplo n.º 3
0
    def _parse_show(self, link):
        event_doc = html_util.fetch_and_parse(link)

        event_detail = event_doc.get_element_by_id("eventDetail")

        title_txt = []

        found_h_el = False

        # Start parsing when we find the first h* element
        # Stop parsing if we found an h* element, but then encounter anything else
        for el in event_detail.getchildren():
            if el.tag in ("h1", "h2"):
                found_h_el = True

                if el.text_content():
                    title_txt.append(el.text_content())
            elif found_h_el:
                break

        """
    <span id="timeDetail">
      Apr 24, 2010<br />
			upstairs<br />
    	Doors @ 7 PM<br/>
    	$15.00 Adv. / $20 at the Door<br />
    	<a href="http://www.deadcellentertainment.tix.com/Schedule.asp?OrganizationNumber=2690" target="_blank">
        <img src="/images/buyticket.png" alt="Purchase Tickets" />
      </a>
    </span>
    """
        time_el = event_detail.get_element_by_id("timeDetail")
        date_txt = time_el.text
        time_txt = time_el.text_content()

        performers = []

        show = Show()

        show.merge_key = link
        show.venue = self.venue()
        show.performers = [Performer(p) for p in lang_util.parse_performers("/".join(title_txt))]
        show.door_time = date_util.parse_show_time(date_txt, time_txt)
        show.show_time = date_util.parse_door_time(date_txt, time_txt)

        show.resources.show_url = link
        show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

        img = html_util.get_first_element(event_detail, "img")

        if img is not None:
            show.resources.image_url = img.get("src")

        return show
Ejemplo n.º 4
0
  def _parse_show(self, link):
    event_doc = html_util.fetch_and_parse(link)

    event        = html_util.get_first_element(event_doc, '#tfly-center-column-wide')
    event_detail = html_util.get_first_element(event,     '#details')
    """
    <div class="info"> 
      Sat, May 22, 2010<br /> 
      Doors: 6:00 PM / Show: 7:00 PM&nbsp;<br /> 
      $5.00<br /> 
    </div> 
    """
    event_info   = html_util.get_first_element(event_detail, ".info")

    date_txt     = event_info.text
    time_txt     = event_info.getchildren()[0].tail

    img          = html_util.get_first_element(event_detail, "img")
    
    performers = [] 

    for tag in ('h1', 'h2', 'h3', 'h4'):
      for h in event_detail.iter(tag = tag):
        performers.extend(self._parse_performers(h))

    show = Show()

    show.merge_key  = link
    show.venue      = self.venue()
    show.performers = performers
    show.show_time  = date_util.parse_show_time(date_txt, time_txt)
    show.door_time  = date_util.parse_door_time(date_txt, time_txt)

    show.resources.show_url      = link
    show.resources.resource_uris = self.resource_extractor.extract_resources(event)
    
    if img is not None:
      show.resources.image_url  = img.get('src')

    return show
Ejemplo n.º 5
0
  def _parse_show(self, link, show_section):
    show_doc    = html_util.fetch_and_parse(link)

    show_detail = html_util.get_first_element(show_doc, "#content .event-detail")

    date_txt    = html_util.get_first_element(show_detail, ".dates").text_content()
    time_txt    = html_util.get_first_element(show_detail, ".times").text_content()
    sold_out    = html_util.get_first_element(show_detail,  '.sold-out', optional = True)
    image       = html_util.get_first_element(show_detail,  'img',       optional = True)
    
    # The image we want is generally the first one, but if the layout changes this may break
    if image is not None:
      image_url = image.get('src')
    else:
      image_url = None
    
    performers = [] 
    
    for tag in ('h1', 'h2', 'h3'):
      for p in show_detail.iter(tag = tag):
        if p.text_content():    
          performers.extend(self._parse_performers(p))

    show = Show()

    show.merge_key               = link
    show.venue                   = self.venue()
    show.performers              = performers
    show.door_time               = date_util.parse_door_time(date_txt, time_txt)
    show.show_time               = date_util.parse_show_time(date_txt, time_txt)
    show.soldout                 = sold_out is not None

    show.resources.show_url      = link
    show.resources.image_url     = image_url
    show.resources.resource_uris = self.resource_extractor.extract_resources(show_detail)

    return show
Ejemplo n.º 6
0
    def _parse_show(self, link):
        event_doc = html_util.fetch_and_parse(link)
        match = self.IS_EVENT.match(link)

        event_id = int(match.group("event_id"))
        event_detail = html_util.get_first_element(event_doc, ".tfly-event-id-%d" % event_id)

        date_txt = html_util.get_first_element(event_doc, ".dates").text_content()
        time_txt = html_util.get_first_element(event_doc, ".times").text_content()

        img = html_util.get_first_element(event_detail, "img")

        performers = []

        for p in html_util.get_elements(event_detail, ".headliners"):
            performers.append(Performer(p.text_content(), headliner=True))

        for p in html_util.get_elements(event_detail, ".supports"):
            for pi in lang_util.parse_performers(p.text_content()):
                performers.append(Performer(pi, headliner=False))

        show = Show()

        show.merge_key = link
        show.venue = self.venue()
        show.performers = performers
        show.show_time = date_util.parse_show_time(date_txt, time_txt)
        show.door_time = date_util.parse_door_time(date_txt, time_txt)

        show.resources.show_url = link
        show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

        if img is not None:
            show.resources.image_url = img.get("src")

        return show