コード例 #1
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def process_episode ( data ):
  e = Episode()
  e.uri   = data['uri']
  if 'title'          in data: e.title   = data['title']
  if 'description'    in data: e.summary = data['description']
  if 'episode_number' in data: e.number  = data['episode_number']
  if 'genres'         in data: e.genres  = get_genres(data['genres'])

  # Media type (ignore virtual entries)
  if 'schedule_only' not in data or not data['schedule_only']:
    if 'media_type'     in data: e.media   = data['media_type']

  # Brand/Series
  c_uri = None
  s_uri = None
  if 'container' in data and 'uri' in data['container']:
    c_uri = data['container']['uri']
  if 'series_summary' in data and 'uri' in data['series_summary']:
    s_uri = data['series_summary']['uri']
  if c_uri and c_uri != s_uri:
    e.brand  = get_brand(c_uri, data['container'])
  if s_uri:
    e.series = get_series(s_uri, data['series_summary'])
    # complete the link
    if e.series and e.brand:
      e.series.brand = e.brand

  # Film?
  if 'specialization' in data:
    e.film = data['specialization'] == 'film'
    if 'year' in data:
      e.year = int(data['year'])

  # Black and White?
  if 'black_and_white' in data:
    e.baw = data['black_and_white']

  # People
  if 'people' in data:
    e.credits = process_people(data['people'])

  # Title
  if e.title:
    r = re.search('^Episode (\d+)$', e.title)
    if r:
      e.title = None
      if e.number is None:
        e.number = util.str2num(r.group(1))
    elif re.search('^\d+/\d+/\d+$', e.title):
      e.title = None

  # OK
  ret = e
  log.debug('episode = %s' % e, 5)
  return e
コード例 #2
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def get_channel ( uri, data ):
  log.debug('get_channel(%s)' % uri, 4)

  # Check cache
  ret = cache.get_channel(uri)

  # Process
  if ret is None:
    ret = process_channel(data)

    # Cache
    if ret: cache.put_channel(uri, ret)

  return ret
コード例 #3
0
ファイル: epg.py プロジェクト: adamsutton/PyEPG
 def finish ( self ):
   def tsort ( x, y ):
     r = cmp(x.start, y.start)
     if not r:
       r = cmp(x.stop, y.stop)
     return r
   for c in self.schedule:
     self.schedule[c].sort(cmp=tsort)
     p = None
     for i in self.schedule[c]:
       if p and p.stop > i.start:
         log.debug('epg - schedule overlap detected')
         log.debug('epg - assume multi-provider discrepancy, will correct')
         p.stop = i.start
       p = i
コード例 #4
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def get_brand ( uri, data = None ):
  log.debug('get_brand(%s)' % uri, 4)

  # Check the cache
  ret = cache.get_brand(uri)

  # Get remote
  if ret is None:
    try:
      if not data or data.keys() == ['uri'] :
        data = get_content(uri, 'brand')
      if data:
        ret = process_brand(data)
    except: pass

    # Put in cache
    if ret: cache.put_brand(uri, ret)
  return ret
コード例 #5
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def get_series ( uri, data = None ):
  log.debug('get_series(%s)' % uri, 4)

  # Check cache
  ret = cache.get_series(uri)

  # Get remote
  if ret is None:
    try:
      if not data or data.keys() == [ 'uri' ]:
        data = get_content(uri, 'series')
      if data:
        ret = process_series(data)
    except: pass

    # Cache
    if ret: cache.put_series(uri, ret)

  return ret
コード例 #6
0
ファイル: cache.py プロジェクト: adamsutton/PyEPG
def put_file ( name, data, imeta = {} ):
  log.debug('cache: put file %s' % name, 3)
  ret = None

  # Fix meta (use lower case keys)
  meta = {}
  for k in imeta: meta[k.lower()] = imeta[k]

  # Add MD5
  if 'md5' not in meta:
    meta['md5'] = md5(data)

  # Store
  path = CACHE_PATH + os.path.sep + name
  if not os.path.exists(os.path.dirname(path)):
    os.makedirs(os.path.dirname(path))
  open(path, 'w').write(data)
  open(path + '.meta', 'w').write(repr(meta))
  log.debug('cache: file %s stored' % name)
コード例 #7
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def publisher_overlay ( a, b, pubs ):
  ignore_keys = conf.get('atlas_overlay_ignore', [ 'uri' ])#, 'transmission_end_time' ])
  pa   = a['publisher']['key']
  pb   = b['publisher']['key']
  ia   = -1
  ib   = -1
  try:
    ia = pubs.index(pa)
  except: pass
  try:
    ib = pubs.index(pb)
  except: pass
  def _overlay ( a, b ):
    if type(b) == dict:
      for k in b:
        if k not in a:
          a[k] = b[k]
        elif k not in ignore_keys:
          a[k] = _overlay(a[k], b[k])
      return a
    elif type(b) == list:
      for i in range(len(b)):
        if i < len(a):
          a[i] = _overlay(a[i], b[i])
        else:
          a.append(b[i])
      return a
    else:
      return b
  ret = None
  if ib < ia:
    t = a
    a = b
    b = t
  args = (a['uri'], a['broadcasts'][0]['transmission_time'].strftime('%H:%M'), a['broadcasts'][0]['transmission_end_time'].strftime('%H:%M'), b['uri'], b['broadcasts'][0]['transmission_time'].strftime('%H:%M'), b['broadcasts'][0]['transmission_end_time'].strftime('%H:%M'))
  log.debug('overlay %s @ %s-%s with %s @ %s-%s' % args, 6)
  ret = _overlay(a, b)
  return ret
コード例 #8
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
def atlas_fetch ( url, conn ):
  jdata = None
  url   = ('http://%s/3.0/' % ATLAS_API_HOST) + url
  log.debug('fetch %s' % url, 2)
  
  # Can fail occasionally - give more than 1 attempt
  t = 2.0
  for i in range(5):
    try:
      data = cache.get_url(url, cache=False, conn=conn)
      if data:
        log.debug('decode json', 3)
        jdata = json.loads(data)
        log.debug(jdata, 5, pprint=True)
        break
    except Exception, e:
      import traceback
      traceback.print_ex
      log.warn('failed to fetch %s [e=%s]' % (url, e))
      pass
    time.sleep(t)
    t *= 2
コード例 #9
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
  def run ( self ):
    log.debug('atlas - data thread %3d started' % self._idx, 0)
    while True:
      c = sched = None
      try:
        (c, pubs, sched) = self._inq.get()
      except Empty:
        break
      log.debug('atlas - data thread %3d process %s' % (self._idx, c.title), 0)

      # Process times
      for s in sched:
        for i in range(len(s['broadcasts'])):
          for k in s['broadcasts'][i]:
            if 'time' in k:
              try:
                s['broadcasts'][i][k] = atlas_p_time(s['broadcasts'][i][k])
              except: pass

      # Process overlays
      log.debug('atlas - data thread %3d overlay %s' % (self._idx, c.title), 1)
      log.debug('atlas - publishers %s' % pubs, 2)
      sched = process_publisher_overlay(sched, pubs)

      # Process into EPG
      log.debug('atlas - data thread %3d store   %s' % (self._idx, c.title), 1)
      process_schedule(self._epg, c, sched)

      # Done
      self._inq.task_done()

    log.debug('atlas - data thread %3d complete' % self._idx, 0)
コード例 #10
0
ファイル: atlas.py プロジェクト: adamsutton/PyEPG
  def run ( self ):
    conn = None
    log.debug('atlas - grab thread %3d started' % self._idx, 0)

    # Create connection
    import httplib
    retry = conf.get('atlas_conn_retry_limit', 5)
    while not conn and retry:
      try:
        conn  = httplib.HTTPConnection(ATLAS_API_HOST)
        log.debug('atlas - grab thread %3d conn created' % self._idx, 1)
      except:
        retry = retry - 1
        time.sleep(conf.get('atlas_conn_retry_period', 2.0))
    if not conn:
      log.error('atlas - grab thread %3d failed to connect')
      return

    # Config
    key    = conf.get('atlas_apikey', None)
    p_pubs = conf.get('atlas_primary_publishers',\
                      [ 'bbc.co.uk', 'itv.com' 'tvblob.com',\
                        'channel4.com' ])
    s_pubs = conf.get('atlas_secondary_publishers',\
                      [ 'pressassociation.com' ])
    anno   = [ 'broadcasts', 'extended_description', 'series_summary',\
               'brand_summary', 'people' ]
    tsize  = conf.get('atlas_time_chunk', self._stop - self._start)

    # Time
    tm_from = time.mktime(self._start.timetuple())
    tm_to   = time.mktime(self._stop.timetuple())

    # URL base
    url = 'schedule.json?'
    url = url + 'annotations=' + ','.join(anno)
    if key:  url = url + '&apiKey=' + key

    # Until queue exhausted
    while True:
    
      # Get next entry
      c = None
      try:
        c = self._inq.get_nowait()
      except Empty:
        break
      log.debug('atlas - grab thread %3d fetch   %s' % (self._idx, c.title), 0)
      sched = []

      # By time
      tf = tm_from
      while tf < tm_to:
        tt = min(tf + tsize, tm_to)
        a  = (time.strftime('%Y-%m-%d %H:%M', time.localtime(tf)),\
              time.strftime('%Y-%m-%d %H:%M', time.localtime(tt)))
        #log.info('atlas -     period %s to %s' % a)

        # Process each publisher
        pubs = []
        for p in s_pubs: pubs.append(p)
        for p in p_pubs:
          if p in c.publisher: pubs.append(p)
        log.debug('PUBS: %s' % pubs, 0)
        for p in pubs:
          #log.info('atlas -       publisher %s' % p)
          u = url + '&from=%d&to=%d' % (tf, tt)
          u = u + '&publisher=' + p
          u = u + '&channel_id=' + c.shortid

          # Fetch data
          data  = atlas_fetch(u, conn)

          # Processs
          if data and 'schedule' in data:
            for s in data['schedule']:
              if 'items' in s:
                sched.extend(s['items'])

        # Update
        tf = tf + tsize

      # Put into the output queue
      log.debug('atlas - grab thread %3d fetched %s' % (self._idx, c.title), 1)
      self._outq.put((c, pubs, sched))
      self._inq.task_done()

    # Done
    if conn: conn.close()
    log.debug('atlas - grab thread %3d complete' % self._idx, 0)
コード例 #11
0
ファイル: cache.py プロジェクト: adamsutton/PyEPG
def get_url ( url, cache = True, ttl = 0, conn = None ):
  import urllib2, urlparse
  log.debug('cache: get url %s' % url, 3)
  ret = None

  # Create directories
  urlp = urlparse.urlparse(url)
  path = urlp.netloc + os.path.sep + urlp.path[1:]
  http = urlp.scheme in [ 'http', 'https' ]

  # Don't cache dynamic requests
  if urlp.params or urlp.query: cache = False

  # Create request
  req  = urllib2.Request(url)
  req.add_header('User-Agent', PYEPG_USER_AGENT)

  # Check cache
  if cache:
    (data, meta, ttl, md5) = _get_file(path, ttl=ttl)

    # OK
    if data and meta and md5:

      # Check remote headers
      if not ttl:
        head = {}

        # Fetch remote headers
        if http and conn:
          log.debug('cache: use persistent connection', 5)
          conn.request('GET', url, None, {'User-Agent':PYEPG_USER_AGENT})
          h = conn.getresponse().getheaders()
          for (k,v) in h: head[k.lower()] = v
        else:
          req.get_method = lambda: 'HEAD'
          up   = urllib2.urlopen(req, timeout=60.0)
          head = up.headers

        # Static page unmodded
        if 'last-modified' in head and 'last-modified' in meta and\
          head['last-modified'] == meta['last-modified']:
          log.debug('cache: last-modified matches', 4)
          ret = data

          # Update timestamp
          touch_file(path)

      # OK
      else:
        ret = data

  # Fetch
  if not ret:
    log.debug('cache: fetch remote', 1)
    head = {}
    if http and conn:
      log.debug('cache: use persistent connection', 5)
      conn.request('GET', url, None, {'User-Agent':PYEPG_USER_AGENT})
      r   = conn.getresponse()
      for (k,v) in r.getheaders(): head[k.lower()] = v
      ret = r.read()
    else:
      req.get_method = lambda: 'GET'
      up   = urllib2.urlopen(req, timeout=60.0)
      ret  = up.read()
      head = up.headers

    # Store
    if cache:
      put_file(path, ret, head)
  
  return ret
コード例 #12
0
ファイル: cache.py プロジェクト: adamsutton/PyEPG
def touch_file ( name ):
  log.debug('cache: touch %s' % name)
  path = CACHE_PATH + os.path.sep + name
  if os.path.exists(path):
    os.utime(path, None)
コード例 #13
0
ファイル: cache.py プロジェクト: adamsutton/PyEPG
def _get_file ( name, ttl = None ):
  import time
  log.debug('cache: get file %s' % name, 3)
  ok    = False
  data  = None
  meta  = None
  valid = False
  path  = CACHE_PATH + os.path.sep + name

  # Default TTL
  if ttl is None: ttl = conf.get('default_cache_ttl', 7*86400)

  # Check age
  if os.path.exists(path) and os.path.exists(path + '.meta'):
    log.debug('cache: %s in cache' % name, 4)
    st   = os.stat(path)
    meta = eval(open(path + '.meta').read())
    data = open(path).read()

    # OK
    if (st.st_mtime + ttl) > time.time():
      log.debug('cache: %s ttl ok' % name, 4)
      ok = True
    
    # TTL passed
    else:
      log.debug('cache: %s ttl expired' % name, 4)

    # Validate
    if 'md5' in meta and meta['md5'] == md5(data):
      log.debug('cache: %s md5 ok' % name, 4)
      valid = True
    else:
      log.debug('cache: %s md5 mismatch' % name)

  # Return data
  return (data, meta, ok, valid)