def get_content(uri, type): ret = None try: data = atlas_fetch_content(uri) if data and 'contents' in data: for c in data['contents']: if 'type' in c and c['type'] == type: ret = c break except Exception, e: log.error(str(e))
def process_brand ( data ): ret = None try: b = Brand() b.uri = data['uri'] if 'title' in data: b.title = data['title'] if 'description' in data: b.summary = data['description'] if 'genres' in data: b.genres = get_genres(data['genres']) ret = b except Exception, e: log.error(str(e))
def get_content ( uri, type ): ret = None try: data = atlas_fetch_content(uri) if data and 'contents' in data: for c in data['contents']: if 'type' in c and c['type'] == type: ret = c break except Exception, e: log.error(str(e))
def process_brand(data): ret = None try: b = Brand() b.uri = data['uri'] if 'title' in data: b.title = data['title'] if 'description' in data: b.summary = data['description'] if 'genres' in data: b.genres = get_genres(data['genres']) ret = b except Exception, e: log.error(str(e))
def _channels (): # Fetch remote data log.info('fetch free to air channel info') chn_data = cache.get_data('uk_satellite_channels.csv', ttl=86400*7) reg_data = cache.get_data('uk_satellite_regions.csv', ttl=86400*7) # Channels list log.info('processing channel list') regional = [] chns = [] for l in chn_data.splitlines()[1:]: p = l.strip().split(',') if len(p) < 9: continue try: c = Channel() c.extra['stream'] = [ (int(p[0]), p[1]) ] c.uri = p[2] c.title = p[3] c.extra['freesat_number'] = int(p[4]) c.number = c.extra['sky_number'] = int(p[5]) c.hd = p[6] == '1' c.radio = p[8] == '1' if (p[10]): c.image = p[10] else: c.image = p[9] # Skip if not c.uri: continue # Already included if c in chns: for t in chns: if t == c: t.extra['stream'].extend(c.extra['stream']) break continue # Regional channel if p[7] == '1': regional.append(c) # Store elif c.extra['stream'][0][0]: chns.append(c) except Exception, e: log.error('failed to process [%s] [e=%s]' % (l, str(e)))
def _channels(): # Fetch remote data log.info('fetch free to air channel info') chn_data = cache.get_data('uk_satellite_channels.csv', ttl=86400 * 7) reg_data = cache.get_data('uk_satellite_regions.csv', ttl=86400 * 7) # Channels list log.info('processing channel list') regional = [] chns = [] for l in chn_data.splitlines()[1:]: p = l.strip().split(',') if len(p) < 9: continue try: c = Channel() c.extra['stream'] = [(int(p[0]), p[1])] c.uri = p[2] c.title = p[3] c.extra['freesat_number'] = int(p[4]) c.number = c.extra['sky_number'] = int(p[5]) c.hd = p[6] == '1' c.radio = p[8] == '1' if (p[10]): c.image = p[10] else: c.image = p[9] # Skip if not c.uri: continue # Already included if c in chns: for t in chns: if t == c: t.extra['stream'].extend(c.extra['stream']) break continue # Regional channel if p[7] == '1': regional.append(c) # Store elif c.extra['stream'][0][0]: chns.append(c) except Exception, e: log.error('failed to process [%s] [e=%s]' % (l, str(e)))
def db_init(path): # Check version db_conn = None db_ver = None db_path = os.path.join(path, 'cache.db') try: if os.path.exists(db_path): db_conn = sqlite.connect(db_path) sql = 'select value from metadata where key="version"' cur = db_conn.cursor() cur.execute(sql) res = cur.fetchall() if res: db_ver = int(res[0][0]) except Exception, e: log.error('failed to check DB version: %s' % str(e))
def db_init ( path ): # Check version db_conn = None db_ver = None db_path = os.path.join(path, 'cache.db') try: if os.path.exists(db_path): db_conn = sqlite.connect(db_path) sql = 'select value from metadata where key="version"' cur = db_conn.cursor() cur.execute(sql) res = cur.fetchall() if res: db_ver = int(res[0][0]) except Exception, e: log.error('failed to check DB version: %s' % str(e))
def process_series ( data ): ret = None try: s = Series() s.uri = data['uri'] if 'title' in data: s.title = data['title'] if 'description' in data: s.summary = data['description'] if 'series_number' in data: s.number = data['series_number'] if 'genres' in data: s.genres = get_genres(data['genres']) if s.title: r = re.search('Series (.*)', s.title) if r: s.title = None if s.number is None: s.number = util.str2num(r.group(1)) ret = s except Exception, e: log.error(str(e))
def process_series(data): ret = None try: s = Series() s.uri = data['uri'] if 'title' in data: s.title = data['title'] if 'description' in data: s.summary = data['description'] if 'series_number' in data: s.number = data['series_number'] if 'genres' in data: s.genres = get_genres(data['genres']) if s.title: r = re.search('Series (.*)', s.title) if r: s.title = None if s.number is None: s.number = util.str2num(r.group(1)) ret = s except Exception, e: log.error(str(e))
def process_channel ( data ): ret = None try: c = Channel() if 'title' in data: c.title = data['title'].encode('utf8') elif 'channel_title' in data: c.title = data['channel_title'].encode('utf8') if 'uri' in data: c.uri = data['uri'] elif 'channel_uri' in data: c.uri = data['channel_uri'] if 'channel_key' in data: c.shortid = data['channel_key'] elif 'id' in data: c.shortid = data['id'] if 'broadcaster' in data and 'key' in data['broadcaster']: c.publisher.append(data['broadcaster']['key']) if 'media_type' in data: c.radio = data['media_type'] == 'audio' c.hd = 'HD' in c.title ret = c except Exception, e: log.error(str(e))
def process_channel(data): ret = None try: c = Channel() if 'title' in data: c.title = data['title'].encode('utf8') elif 'channel_title' in data: c.title = data['channel_title'].encode('utf8') if 'uri' in data: c.uri = data['uri'] elif 'channel_uri' in data: c.uri = data['channel_uri'] if 'channel_key' in data: c.shortid = data['channel_key'] elif 'id' in data: c.shortid = data['id'] if 'broadcaster' in data and 'key' in data['broadcaster']: c.publisher.append(data['broadcaster']['key']) if 'media_type' in data: c.radio = data['media_type'] == 'audio' c.hd = 'HD' in c.title ret = c except Exception, e: log.error(str(e))
try: data = cache.get_url(url, cache=False, conn=conn) if data: log.debug('decode json', 3) jdata = json.loads(data) log.debug(jdata, 5, pprint=True) break except Exception, e: import traceback traceback.print_ex log.warn('failed to fetch %s [e=%s]' % (url, e)) pass time.sleep(t) t *= 2 if not jdata: log.error('failed to fetch %s, giving up' % url) return jdata # Get content data def atlas_fetch_content ( uri, key = None ): url = 'content.json?uri=%s' % uri if key: url = url + '&apiKey=%s' % key return atlas_fetch(url) # Parse time def atlas_p_time ( tm ): ret = datetime.datetime.strptime(tm, '%Y-%m-%dT%H:%M:%SZ') ret = ret.replace(tzinfo=util.TimeZoneSimple(0)) return ret
try: data = cache.get_url(url, cache=False, conn=conn) if data: log.debug('decode json', 3) jdata = json.loads(data) log.debug(jdata, 5, pprint=True) break except Exception, e: import traceback traceback.print_ex log.warn('failed to fetch %s [e=%s]' % (url, e)) pass time.sleep(t) t *= 2 if not jdata: log.error('failed to fetch %s, giving up' % url) return jdata # Get content data def atlas_fetch_content(uri, key=None): url = 'content.json?uri=%s' % uri if key: url = url + '&apiKey=%s' % key return atlas_fetch(url) # Parse time def atlas_p_time(tm): ret = datetime.datetime.strptime(tm, '%Y-%m-%dT%H:%M:%SZ') ret = ret.replace(tzinfo=util.TimeZoneSimple(0))
def configure(opts, args, conf_path=None): # # Global # print 'System Configuration' print '-' * 60 # Number of days to grab days = conf.get('days', 7) while True: print 'Days to grab [%d]: ' % days, t = sys.stdin.readline().strip() if not t: break try: days = int(t) break except: pass conf.set('days', days) # Postcode print '\nPostcode (for regional TV) [%s]: ' % conf.get('postcode', ''), pc = sys.stdin.readline().strip() if pc: conf.set('postcode', pc) # # Grabber # grabbers = get_grabbers() if not grabbers: log.error('no grabbers available') sys.exit(1) options = map(lambda x: x[0], grabbers) idx = get_select('\nSelect grabber:', options) grabber = grabbers[idx][1] conf.set('grabber', grabbers[idx][0]) print '' print 'Grabber: %s' % grabbers[idx][0] # # Formatter # formatters = get_formatters() if not formatters: log.error('no formatters available') sys.exit(1) options = map(lambda x: x[0], formatters) idx = get_select('\nSelect formatter:', options) formatter = formatters[idx][1] conf.set('formatter', formatters[idx][0]) print '' print 'Formatter: %s' % formatters[idx][0] # # Grabber/Formatter config # if hasattr(grabber, 'configure'): grabber.configure() if hasattr(formatter, 'configure'): formatter.configure() # # Channels # channels = [] print '' print 'Channel Configuration' print '-' * 60 # Get packages packages = grabber.packages() options = [] options.extend(['Skip']) options.extend(map(lambda x: x.title(), packages)) idx = get_select('Select Platform:', options) # Platform if idx: idx = idx - 1 package = packages[idx] conf.set('package', package.id()) # Exclusions a = None while a not in ['y', 'n', 'yes', 'no']: print '\nWould you like to add exclusions (y/n)? ', a = sys.stdin.readline().strip().lower() # Get if a in ['y', 'yes']: for c in package.channels(): a = None while a not in ['y', 'n', 'yes', 'no']: print '\n %s (y/n)? ' % c.title, a = sys.stdin.readline().strip().lower() if a in ['y', 'yes']: channels.append(c.title) # Store channels = [] for c in package.channels(): channels.append(c.uri) conf.set('channel[]', channels) # # Output summary and get confirmation # # TODO # # Save # conf.save()
def run ( self ): conn = None log.debug('atlas - grab thread %3d started' % self._idx, 0) # Create connection import httplib retry = conf.get('atlas_conn_retry_limit', 5) while not conn and retry: try: conn = httplib.HTTPConnection(ATLAS_API_HOST) log.debug('atlas - grab thread %3d conn created' % self._idx, 1) except: retry = retry - 1 time.sleep(conf.get('atlas_conn_retry_period', 2.0)) if not conn: log.error('atlas - grab thread %3d failed to connect') return # Config key = conf.get('atlas_apikey', None) p_pubs = conf.get('atlas_primary_publishers',\ [ 'bbc.co.uk', 'itv.com' 'tvblob.com',\ 'channel4.com' ]) s_pubs = conf.get('atlas_secondary_publishers',\ [ 'pressassociation.com' ]) anno = [ 'broadcasts', 'extended_description', 'series_summary',\ 'brand_summary', 'people' ] tsize = conf.get('atlas_time_chunk', self._stop - self._start) # Time tm_from = time.mktime(self._start.timetuple()) tm_to = time.mktime(self._stop.timetuple()) # URL base url = 'schedule.json?' url = url + 'annotations=' + ','.join(anno) if key: url = url + '&apiKey=' + key # Until queue exhausted while True: # Get next entry c = None try: c = self._inq.get_nowait() except Empty: break log.debug('atlas - grab thread %3d fetch %s' % (self._idx, c.title), 0) sched = [] # By time tf = tm_from while tf < tm_to: tt = min(tf + tsize, tm_to) a = (time.strftime('%Y-%m-%d %H:%M', time.localtime(tf)),\ time.strftime('%Y-%m-%d %H:%M', time.localtime(tt))) #log.info('atlas - period %s to %s' % a) # Process each publisher pubs = [] for p in s_pubs: pubs.append(p) for p in p_pubs: if p in c.publisher: pubs.append(p) log.debug('PUBS: %s' % pubs, 0) for p in pubs: #log.info('atlas - publisher %s' % p) u = url + '&from=%d&to=%d' % (tf, tt) u = u + '&publisher=' + p u = u + '&channel_id=' + c.shortid # Fetch data data = atlas_fetch(u, conn) # Processs if data and 'schedule' in data: for s in data['schedule']: if 'items' in s: sched.extend(s['items']) # Update tf = tf + tsize # Put into the output queue log.debug('atlas - grab thread %3d fetched %s' % (self._idx, c.title), 1) self._outq.put((c, pubs, sched)) self._inq.task_done() # Done if conn: conn.close() log.debug('atlas - grab thread %3d complete' % self._idx, 0)
def grab ( epg, channels, start, stop ): import multiprocessing as mp # Filter the channel list (only include those we have listing for) channels = filter_channels(channels) days = util.total_seconds(stop - start) / 86400 channels = sorted(channels, cmp=lambda a,b: cmp(a.number,b.number)) log.info('atlas - epg grab %d channels for %d days' % (len(channels), days)) # Config grab_thread_cnt = conf.get('atlas_grab_threads', 32) data_thread_cnt = conf.get('atlas_data_threads', 0) if grab_thread_cnt <= 0: grab_thread_cnt = len(channels) if data_thread_cnt <= 0: data_thread_cnt = mp.cpu_count() * 2 data_thread_cnt = min(data_thread_cnt, len(channels)) grab_thread_cnt = min(grab_thread_cnt, len(channels)) # Create input/output queues inq = ChannelQueue(channels) outq = DataQueue(len(channels)) # Create grab threads grab_threads = [] for i in range(grab_thread_cnt): t = GrabThread(i, inq, outq, start, stop) grab_threads.append(t) # Create data threads data_threads = [] for i in range(data_thread_cnt): t = DataThread(i, outq, epg) data_threads.append(t) # Start threads for t in grab_threads: t.start() for t in data_threads: t.start() # Wait for completion (inq first) ins = outs = len(channels) while True: s = inq.remain() if s != ins: ins = s log.info('atlas - grab %3d/%3d channels remain' % (s, len(channels))) s = outq.remain() if s != outs: outs = s log.info('atlas - proc %3d/%3d channels remain' % (s, len(channels))) if not ins and not outs: break # Safety checks i = 0 for t in grab_threads: if t.isAlive(): i = i + 1 if not i and ins: log.error('atlas - grab threads have died prematurely') break i = 0 for t in data_threads: if t.isAlive(): i = i + 1 if not i and outs: log.error('atlas - proc threads have died prematurely') break time.sleep(1.0)
def run(self): conn = None log.debug('atlas - grab thread %3d started' % self._idx, 0) # Create connection import httplib retry = conf.get('atlas_conn_retry_limit', 5) while not conn and retry: try: conn = httplib.HTTPConnection(ATLAS_API_HOST) log.debug('atlas - grab thread %3d conn created' % self._idx, 1) except: retry = retry - 1 time.sleep(conf.get('atlas_conn_retry_period', 2.0)) if not conn: log.error('atlas - grab thread %3d failed to connect') return # Config key = conf.get('atlas_apikey', None) p_pubs = conf.get('atlas_primary_publishers',\ [ 'bbc.co.uk', 'itv.com' 'tvblob.com',\ 'channel4.com' ]) s_pubs = conf.get('atlas_secondary_publishers',\ [ 'pressassociation.com' ]) anno = [ 'broadcasts', 'extended_description', 'series_summary',\ 'brand_summary', 'people' ] tsize = conf.get('atlas_time_chunk', self._stop - self._start) # Time tm_from = time.mktime(self._start.timetuple()) tm_to = time.mktime(self._stop.timetuple()) # URL base url = 'schedule.json?' url = url + 'annotations=' + ','.join(anno) if key: url = url + '&apiKey=' + key # Until queue exhausted while True: # Get next entry c = None try: c = self._inq.get_nowait() except Empty: break log.debug( 'atlas - grab thread %3d fetch %s' % (self._idx, c.title), 0) sched = [] # By time tf = tm_from while tf < tm_to: tt = min(tf + tsize, tm_to) a = (time.strftime('%Y-%m-%d %H:%M', time.localtime(tf)),\ time.strftime('%Y-%m-%d %H:%M', time.localtime(tt))) #log.info('atlas - period %s to %s' % a) # Process each publisher pubs = [] for p in s_pubs: pubs.append(p) for p in p_pubs: if p in c.publisher: pubs.append(p) log.debug('PUBS: %s' % pubs, 0) for p in pubs: #log.info('atlas - publisher %s' % p) u = url + '&from=%d&to=%d' % (tf, tt) u = u + '&publisher=' + p u = u + '&channel_id=' + c.shortid # Fetch data data = atlas_fetch(u, conn) # Processs if data and 'schedule' in data: for s in data['schedule']: if 'items' in s: sched.extend(s['items']) # Update tf = tf + tsize # Put into the output queue log.debug( 'atlas - grab thread %3d fetched %s' % (self._idx, c.title), 1) self._outq.put((c, pubs, sched)) self._inq.task_done() # Done if conn: conn.close() log.debug('atlas - grab thread %3d complete' % self._idx, 0)
def grab(epg, channels, start, stop): import multiprocessing as mp # Filter the channel list (only include those we have listing for) channels = filter_channels(channels) days = util.total_seconds(stop - start) / 86400 channels = sorted(channels, cmp=lambda a, b: cmp(a.number, b.number)) log.info('atlas - epg grab %d channels for %d days' % (len(channels), days)) # Config grab_thread_cnt = conf.get('atlas_grab_threads', 32) data_thread_cnt = conf.get('atlas_data_threads', 0) if grab_thread_cnt <= 0: grab_thread_cnt = len(channels) if data_thread_cnt <= 0: data_thread_cnt = mp.cpu_count() * 2 data_thread_cnt = min(data_thread_cnt, len(channels)) grab_thread_cnt = min(grab_thread_cnt, len(channels)) # Create input/output queues inq = ChannelQueue(channels) outq = DataQueue(len(channels)) # Create grab threads grab_threads = [] for i in range(grab_thread_cnt): t = GrabThread(i, inq, outq, start, stop) grab_threads.append(t) # Create data threads data_threads = [] for i in range(data_thread_cnt): t = DataThread(i, outq, epg) data_threads.append(t) # Start threads for t in grab_threads: t.start() for t in data_threads: t.start() # Wait for completion (inq first) ins = outs = len(channels) while True: s = inq.remain() if s != ins: ins = s log.info('atlas - grab %3d/%3d channels remain' % (s, len(channels))) s = outq.remain() if s != outs: outs = s log.info('atlas - proc %3d/%3d channels remain' % (s, len(channels))) if not ins and not outs: break # Safety checks i = 0 for t in grab_threads: if t.isAlive(): i = i + 1 if not i and ins: log.error('atlas - grab threads have died prematurely') break i = 0 for t in data_threads: if t.isAlive(): i = i + 1 if not i and outs: log.error('atlas - proc threads have died prematurely') break time.sleep(1.0)
def configure ( opts, args, conf_path = None ): # # Global # print 'System Configuration' print '-' * 60 # Number of days to grab days = conf.get('days', 7) while True: print 'Days to grab [%d]: ' % days, t = sys.stdin.readline().strip() if not t: break try: days = int(t) break except: pass conf.set('days', days) # Postcode print '\nPostcode (for regional TV) [%s]: ' % conf.get('postcode', ''), pc = sys.stdin.readline().strip() if pc: conf.set('postcode', pc) # # Grabber # grabbers = get_grabbers() if not grabbers: log.error('no grabbers available') sys.exit(1) options = map(lambda x: x[0], grabbers) idx = get_select('\nSelect grabber:', options) grabber = grabbers[idx][1] conf.set('grabber', grabbers[idx][0]) print '' print 'Grabber: %s' % grabbers[idx][0] # # Formatter # formatters = get_formatters() if not formatters: log.error('no formatters available') sys.exit(1) options = map(lambda x: x[0], formatters) idx = get_select('\nSelect formatter:', options) formatter = formatters[idx][1] conf.set('formatter', formatters[idx][0]) print '' print 'Formatter: %s' % formatters[idx][0] # # Grabber/Formatter config # if hasattr(grabber, 'configure'): grabber.configure() if hasattr(formatter, 'configure'): formatter.configure() # # Channels # channels = [] print '' print 'Channel Configuration' print '-' * 60 # Get packages packages = grabber.packages() options = [] options.extend(['Skip']) options.extend(map(lambda x: x.title(), packages)) idx = get_select('Select Platform:', options) # Platform if idx: idx = idx - 1 package = packages[idx] conf.set('package', package.id()) # Exclusions a = None while a not in [ 'y', 'n', 'yes', 'no' ]: print '\nWould you like to add exclusions (y/n)? ', a = sys.stdin.readline().strip().lower() # Get if a in [ 'y', 'yes' ]: for c in package.channels(): a = None while a not in [ 'y', 'n', 'yes', 'no' ]: print '\n %s (y/n)? ' % c.title, a = sys.stdin.readline().strip().lower() if a in [ 'y', 'yes' ]: channels.append(c.title) # Store channels = [] for c in package.channels(): channels.append(c.uri) conf.set('channel[]', channels) # # Output summary and get confirmation # # TODO # # Save # conf.save()