Esempio n. 1
0
  def _festival(self, rows):
    if len(rows) == 0:
      return None
      
    info = DictForNameValue(rows)
    
    for r in ('name', 'merge-key', 'season-merge-key', 'season-start-date', 'season-end-date'):
      if info.get(r) == None:
        raise Exception("Festival lacks required field: %s" % r)

    festival_query  = Q(merge_key = info['merge-key'])
    festival_kwargs = {'name': info['name'], 'merge_key': info['merge-key']}
    
    festival, created = Festival.objects.get_or_create(festival_query, defaults = festival_kwargs)
    
    season_query  = Q(festival_id = festival.id, merge_key = info['season-merge-key'])
    season_kwargs = {'festival_id': festival.id,
                     'merge_key':   info['season-merge-key'],
                     'start_date':  date_util.parse_date_time(info['season-start-date']),
                     'end_date':    date_util.parse_date_time(info['season-end-date'])
                    }

    season, created = FestivalSeason.objects.get_or_create(season_query, defaults = season_kwargs)
      
    return festival, season
Esempio n. 2
0
  def _process_entry_group(self, start_date, entries):
    show = Show()

    show.venue      = self.venue()
    show.performers = []
    
    entries.sort(key = lambda e: e.when[0].start_time, reverse = True)

    for entry in entries:
      logger.debug("Processing entry: %s, starting on: %s" % (entry.title.text, entry.when[0].start_time))

      # Full day events usually denote a title which we currently will simply skip
      if 'T' not in entry.when[0].start_time:
        logger.debug('Entry "%s" is an all day event, skipping' % entry.title.text)

        continue
      elif 'pub side' in entry.title.text.lower():
        logger.debug('Entry "%s" is on the Pub Side of Spike Hill, skipping' % entry.title.text)

        continue

      start_time     = date_util.parse_date_time(entry.when[0].start_time)

      show.show_time = min(start_time, show.show_time or start_time)

      show.performers.append(Performer(entry.title.text))

    return [show]
Esempio n. 3
0
  def _trans_show(self, show_data):
    LOG.debug("Checking event: %s" % show_data['EventName'])

    if "Music" not in show_data['MajorGenre']:
      LOG.debug("Skipping non music show")
      return None
    elif show_data.get('Canceled'):
      LOG.debug("Skipping cancelled show")
      return None
    elif 'VIP Packages' in show_data['EventName']:
      LOG.debug("Skipping VIP package")
      return None
  
    show = Show()
    
    performers = []
    
    for i, name in enumerate(lang_util.parse_performers(show_data['EventName'])):
      performers.append(Performer(name, headliner = i == 0))
            
    show.merge_key  = show_data['EventId']
    show.venue      = self.venue()
    show.performers = performers
    show.show_time  = date_util.parse_date_time(show_data['EventDate'])
    
    #if show.show_time:
    #  show.show_time = timezone(show_data['Timezone']).localize(show.show_time)
    
    if show_data['AttractionImage']:
      show.resources.image_url = self._image_url(show_data, show_data['AttractionImage'][0])

    return show
Esempio n. 4
0
  def _parse_show(self, link):
    LOG.debug("Fetching show: %s" % link)

    event_doc    = html_util.fetch_and_parse(link)

    event_detail = event_doc.get_element_by_id("mainColumn")

    show = Show()
    
    for performer in html_util.get_elements(event_detail, 'h1'):
      name = performer.text_content().strip(' \n\r\t')
      if name:
        show.performers.append(Performer(name))
      
    date_txt = html_util.get_first_element(event_detail, '.date').text_content()
    
    event_match = self.EVENT_URL.match(link)

    show.merge_key = event_match.group('page_id')
    show.venue     = self.venue()
    show.show_time = date_util.parse_date_time(date_txt).replace(hour = 21)
    
    LOG.debug('Date: %s' % show.date)
    
    show.resources.show_url      = link
    show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail)

    for img_tag in event_detail.iter(tag = 'img'):
      if 'main' in img_tag.get('src'):
        show.resources.image_url = img_tag.get('src')
        
        break

    return show
Esempio n. 5
0
  def _process_entry(self, entry):
    logger.debug("Processing entry: %s, starting on: %s" % (entry.title.text, entry.when[0].start_time))
    
    if not self.BACK_ROOM_RE.match(entry.title.text):
      return None
      
    title_txt = self.BACK_ROOM_RE.sub('', entry.title.text)

    show = Show()

    show.venue      = self.venue()
    show.performers = [Performer(p) for p in lang_util.parse_performers(title_txt)]

    show.show_time = date_util.parse_date_time(entry.when[0].start_time)

    return show
Esempio n. 6
0
  def _get_parser(self):
    calendar_service = CalendarService()

    yesterday    = datetime.today() - timedelta(days = 1)
    three_months = yesterday + timedelta(days = 90)

    query = CalendarEventQuery(self.calendar_id(), 'public', 'full')

    query.start_min      = yesterday.strftime('%F')
    query.start_max      = three_months.strftime('%F')
    
    query['max-results'] = '500'

    feed = calendar_service.CalendarQuery(query)
    
    start_date = lambda e: date_util.parse_date_time(e.when[0].start_time).date()
    
    single    = []
    recurring = []
    
    for e in feed.entry:
      if len(e.when) > 1:
        recurring.append(e)
      else:
        single.append(e)
        
    for show in self._process_recurring_entries(recurring):
      if show:
        yield show

    if self.group_by_date():
      single.sort(key = start_date)

      for batch_date, date_entries in groupby(single, start_date):
        for show in self._process_entry_group(batch_date, list(date_entries)):
          if show:
            pass
          
          yield show 
    else:
      for entry in single:
        show = self._process_entry(entry)

        if show:
          yield show
Esempio n. 7
0
    def _parse_show(self, api, event):
        LOG.debug("Parsing event: %s" % event["id"])

        show = Show()

        show.merge_key = event["id"]
        show.venue = self.venue()
        show.performers = [Performer(p) for p in lang_util.parse_performers(event["name"])]
        show.show_time = date_util.parse_date_time(event["start_time"])

        html_doc = u"<html><body>%s</body></html>" % cgi.escape(event.get("description", ""))
        doc = lxml.html.document_fromstring(html_doc)

        show.resources.show_url = self.EVENT_URL % event["id"]
        show.resources.image_url = self.PICTURE_URL % event["id"]
        show.resources.resource_uris = self.resource_extractor.extract_resources(doc)

        return show
Esempio n. 8
0
    def _get_parser(self):
        api = GraphAPI(self.settings["facebook_access_token"])
        events = api.get_connections(self.profile_id(), "events")

        today = datetime.today()
        event_ids = []

        for event_info in events["data"]:
            start_time = date_util.parse_date_time(event_info["start_time"])

            if start_time >= today:
                event_ids.append(event_info["id"])

        if event_ids:
            parse_events = api.get_objects(event_ids)

            for event in parse_events.values():
                yield self._parse_show(api, event)
Esempio n. 9
0
  def _parse_show(self, url, section):
    doc = html_util.fetch_and_parse(url)

    show_el  = html_util.get_first_element(doc, '#detailPage')
    date_txt = html_util.get_first_element(show_el, 'time.dtstart').get('datetime')

    title = html_util.get_first_element(section, 'h4').text_content()

    show = Show()

    show.merge_key  = url
    show.venue      = self.venue()
    show.performers = [Performer(p) for p in lang_util.parse_performers(title)]
    show.show_time  = date_util.parse_date_time(date_txt)

    show.resources.show_url      = url
    show.resources.resource_uris = self.resource_extractor.extract_resources(section, show_el)

    return show
Esempio n. 10
0
  def _trans_show(self, event):
    LOG.debug("Transforming show: %s" % event.get_title())

    show = Show()
    
    performers = []
    
    artists = event.get_artists()
    
    for i, artist in enumerate(artists):
      performers.append(Performer(artist.get_name(), headliner = i == 0))
      
      if artist.get_cover_image(size = pylast.COVER_MEGA):
        show.resources.image_url = artist.get_cover_image(size = pylast.COVER_MEGA)
            
    show.merge_key  = event.get_id()
    show.venue      = self.venue()
    show.performers = performers
    show.show_time  = date_util.parse_date_time(event.get_start_date())

    show.resources.show_url = event.get_url()

    return show
Esempio n. 11
0
  def _parse_show(self, link, show_section):
    show_doc    = html_util.fetch_and_parse(link)

    show_detail = show_doc.get_element_by_id("content")
    
    title       = html_util.get_first_element(show_detail, '.title').text

    date_txt    = html_util.get_first_element(show_section, '.date').text
    image_url   = html_util.get_first_element(show_detail,  '.left-view-header img').get('src')
    
    performers = []
    
    performer_detail = html_util.get_first_element(show_detail, '.performers')
    performer_urls   = []
    
    for anchor in performer_detail.iter(tag = 'a'):
      performers.extend(self._parse_performers(anchor))
      
      if self.IS_ARTIST_URL_RE.match(anchor.get('href')):
        performer_urls.append(anchor.get('href'))
        
    resource_sections = [show_section, show_detail]
    
    for url in performer_urls:
      resource_sections.extend(self.fetch_performer_content(url))
    
    show = Show()

    show.merge_key               = link
    show.venue                   = self.venue()
    show.performers              = performers
    show.show_time               = date_util.parse_date_time(date_txt)
    show.resources.show_url      = link
    show.resources.image_url     = image_url
    show.resources.resource_uris = self.resource_extractor.extract_resources(*resource_sections)

    return show
Esempio n. 12
0
  def _trans_record(self, record):
    show = Show()
    
    show.venue     = Venue(record.get('venue-name'), record.get('venue-url'))
    show.title     = record.get('title')
    show.merge_key = record.get('merge-key')
    
    performers = []
    
    if record.get('performers'):
      for performer in record['performers'].split(','):
        performers.append(Performer(performer.strip()))
        
    if record.get('tags'):
      show.tags = [t.strip() for t in record['tags'].split(',')]
      
    date_txt = record.get('show-date')
    
    if not date_txt:
      raise Exception('Show Date is required')
    else:
      show.date = date_util.parse_date_time(date_txt)
      
    if performers:
      show.performers = performers
      
    if record.get('show-time'):
      show.show_time = date_util.parse_date_and_time(date_txt, record.get('show-time'))

    if record.get('door-time'):
      show.show_time = date_util.parse_date_and_time(date_txt, record.get('door-time'))

    show.resources.show_url      = record.get('show-url')
    show.resources.image_url     = record.get('image-url')
    show.resources.resource_uris = self.resource_extractor.extract_resources(self._create_resource_doc(record))
      
    return show
Esempio n. 13
0
  def _parse_shows(self, entry):
    content = None
    shows   = []
    today   = datetime.now()
    
    entry_date = date_util.parse_date_time(entry.published)
    
    # Only parse shows for the current year, or at the tail end of last year
    if entry_date.year != today.year or (entry_date.year == today.year -1 and entry_date.month > 10):
      return []

    for item in entry.content:
      if item.type in ('text/html',):
        content = item.value

    if not content:
      logging.error('Unable to extract content from entry: %s' % entry.id)

      return []
      
    # This next part is technically pretty evil
    entry_doc = lxml.html.fromstring(content)

    tags = ['span', 'b', 'i', 'strong', 'em']

    cleaner = Cleaner(remove_tags = tags, links = False)

    clean_content = cleaner.clean_html(entry_doc)
  
    # FIXME patch lxml to handle this while calling text_content()
    # http://codespeak.net/pipermail/lxml-dev/2008-August/004009.html  
    content_str = lxml.html.tostring(clean_content)
    
    for regexp, replacement in self.REPLACEMENTS:
      content_str = regexp.sub(replacement, content_str)
  
    for part in self.SHOW_DIVIDER_RE.split(content_str):
      part = part.strip(' \t\n')
      
      parts = part.split('\n')
      
      header = parts.pop(0)
      body   = '\n'.join(parts)
      
      header_parts = self.HEADER_SEP_RE.split(header)

      date_txt = header_parts.pop(0)
      time_txt = None
      
      for part in header_parts:
        if date_util.STRICT_TIME_RE.search(part):
          time_txt = date_util.sanitize_time(part)
          
          break
          
      if not time_txt:
        logging.error('Unable to find time in header: %s' % header)
        
        continue

      if '-' in time_txt:
        time_txt = time_txt.split('-')[0].strip()
        
      if not(time_txt.endswith('am') or time_txt.endswith('pm')):
        time_txt = time_txt + 'pm'

      show_doc = lxml.html.fromstring(body)
      
      use_all         = False
      performer_parts = []
      all_parts       = []
      
      for el in show_doc.iter():
        if self._is_img(el):
          break

        text = el.text or ''
        tail = el.tail or ''
  
        for regexp in self.BODY_SKIP:
          if regexp.search(text):
            text = ''

          if regexp.search(tail):
            tail = ''
        
        for p in (text, tail):
          if p:
            all_parts.append(p)

        if text and el.tag != 'a':
          use_all = True
        
        if el.tag == 'a' and tail.strip() not in(',', '&', 'w/', ''):
          use_all = True

        if el.tag == 'a':
          performer_parts.append(text)

      img_url = None
      
      for img in show_doc.iter(tag = 'img'):
        if img.get('src'):
          img_url = img.get('src')
          
          break
          
      show = Show()
      
      show.venue = self.venue()
      
      if use_all:
        performers_str  = ' '.join(all_parts).replace(' ,', ',').replace('  ', ' ')

        show.performers = [Performer(name) for name in lang_util.parse_performers(performers_str)]
      else:
        show.performers = [Performer(name) for name in performer_parts if name]
      
      try:
        show.show_time = date_util.parse_date_and_time(date_txt, time_txt)
      except:
        logging.exception('Unable to parse: %s - %s' % (date_txt, time_txt))
        continue

      show.resources.image_url     = img_url
      show.resources.resource_uris = self.resource_extractor.extract_resources(show_doc)

      date_util.adjust_fuzzy_years(show, entry_date)

      shows.append(show)

    return shows