def samp_guess(samp): if DB.get('samp'): return True global samp_distribution # first to this amount is our winner cutoff = 10 if samp not in samp_distribution: samp_distribution[samp] = 0 samp_distribution[samp] += 1 if samp_distribution[samp] > cutoff: DB.set('samp', samp) globals()['_FRAME_LENGTH'] = (1152.0 / samp)
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to' url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s×tamp=%d&key=%s" % ( misc.config['lat'], misc.config['long'], when, api_key) try: stream = urlopen(url) data = stream.read().decode('utf8') opts = json.loads(data) except: opts = {'status': None} if opts['status'] == 'OK': logging.info( "Location: %s | offset: %s | dst: %s " % (opts['timeZoneId'], opts['rawOffset'], opts['dstOffset'])) offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60 DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = misc.config['_private']['misc']['timezonedb_key'] url = "http://api.timezonedb.com/v2.1/get-time-zone?key={}&by=position&lat={}&lng={}".format(api_key, misc.config['lat'], misc.config['long']) try: stream = urlopen(url) data = stream.read().decode('utf8').split("\n")[1] xml = etree.fromstring(data) offset = xml.xpath('gmtOffset') opts = {'status': 'OK', 'offset': int(offset[0].text) } except Exception as exc: print(exc) opts = {'status': None} if opts['status'] == 'OK': offset = opts['offset'] / 60 logging.info("Found Offset: {}".format(offset)) DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to' url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s×tamp=%d&key=%s" % (misc.config['lat'], misc.config['long'], when, api_key) try: stream = urlopen(url) data = stream.read().decode('utf8') opts = json.loads(data) except: opts = {'status': None} if opts['status'] == 'OK': logging.info("Location: %s | offset: %s" % (opts['timeZoneId'], opts['rawOffset'])) offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60 DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def signature(fname, blockcount=-1, depth=1): global _LASTFORMAT audio_format = DB.get('format') if not audio_format: audio_format, start = get_audio_format(fname) if audio_format: logging.info("Setting this stream's audio format as %s" % audio_format) DB.set('format', audio_format) else: logging.warn("Can't determine type of file for %s." % fname) return None, None block = None if audio_format == _FORMAT_AAC: sig, block = aac_signature(fname, blockcount) # We permit the idea that a file can be a false positive. But we do not # permit the idea that a file can be a false positive and correctly register # over some number of sequential blocks (currently set at whatever the # constant is below). if audio_format == _FORMAT_MP3 or not block or len(block) < 5: sig, block = mp3_signature(fname, blockcount) if len(block) > 0 and audio_format == _FORMAT_AAC: DB.set('format', _FORMAT_MP3) DB.clear_cache() # Stream formats can change actually. if len(block) < 5: tryformat = _FORMAT_AAC if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3 DB.set('format', tryformat) DB.clear_cache() # Make sure we don't foolishly recurse if depth == 1: return signature(fname, blockcount, depth + 1) else: # Otherwise if we fail to find anything upon our change-format desperation # move, we should return this as the none type to be handled appropriately. return None, None _LASTFORMAT = audio_format return sig, block
def signature(fname, blockcount=-1, depth=1): global _LASTFORMAT audio_format = DB.get('format') if not audio_format: audio_format, start = get_audio_format(fname) if audio_format: logging.info("Setting this stream's audio format as %s" % audio_format) DB.set('format', audio_format) else: logging.warn("Can't determine type of file for %s." % fname) return False block = None if audio_format == _FORMAT_AAC: sig, block = aac_signature(fname, blockcount) # We permit the idea that a file can be a false positive. But we do not # permit the idea that a file can be a false positive and correctly register # over some number of sequential blocks (currently set at whatever the # constant is below). if audio_format == _FORMAT_MP3 or not block or len(block) < 5: sig, block = mp3_signature(fname, blockcount) if len(block) > 0 and audio_format == _FORMAT_AAC: DB.set('format', _FORMAT_MP3) DB.clear_cache() # Stream formats can change actually. if len(block) < 5: tryformat = _FORMAT_AAC if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3 DB.set('format', tryformat) DB.clear_cache() # Make sure we don't foolishly recurse if depth == 1: return signature(fname, blockcount, depth + 1) _LASTFORMAT = audio_format return sig, block
def stream_manager(): global g_download_kill_pid import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') if has_bitrate and int(has_bitrate) == 0: has_bitrate = False first_time = 0 total_bytes = 0 normalize_delay = 6 cycle_count = 0 cascade_time = misc.config['cascade_time'] cascade_buffer = misc.config['cascade_buffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 last_heartbeat = None change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycle_time'] process = None process_next = None # The manager will be the one that starts this. #server.manager(misc.config) webserver = Thread(target=server.manager, name='Webserver', args=(misc.config,)) webserver.start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) logging.info('Starting download #%d (%s). Next up in %ds' % (g_download_pid, file_name, cascade_margin)) process = Thread(target=stream_download, name='Download-%d:%s' % (g_download_pid, TS.ts_to_name()), args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.daemon = True process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration last_heartbeat_tid = -1 while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # if last_prune < (TS.unixtime('prune') - prune_duration): prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration # We just assume it can do its business in under a day prune = cloud.prune() last_prune = TS.unixtime('prune') misc.last_prune = last_prune # Increment the amount of time this has been running if cycle_count % 30 == 0: # we only do these things occasionally, they # are either not very important or are not # expected to change that often TS.get_offset() cycle_count += 1 lr_set = False expired_heartbeat = last_heartbeat and time.time() - last_heartbeat > cycle_time * 2 while not misc.queue.empty(): what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We expire our heartbeat in order to force a new stream # to start expired_heartbeat = True elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap'] logging.info("Restart requested ... shutting down download at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) #misc.shutdown_real(do_restart=False) #misc.download_ipc.put(('shutdown_time', shutdown_time)) while True: time.sleep(5) with open(misc.PIDFILE_MANAGER, 'r') as f: manager_pid = f.read() #print manager_pid, os.getpid(), manager_pid == os.getpid() #logging.info(DB.get('runcount', use_cache=False)) #logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) if ps_out > 1: logging.info("Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put(('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn("Couldn't find a replacement process ... not going anywhere."); elif what == 'heartbeat': if not lr_set: lr_set = True last_heartbeat = time.time() last_heartbeat_tid = value[1] if last_heartbeat_tid < g_download_kill_pid: logging.warn("hb: Got a heartbeat for #%d but everything below #%d should be gone!" % (last_heartbeat_tid, g_download_kill_pid)) DB.set('last_recorded', time.time()) if not has_bitrate: margin = 60 # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[2] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + margin)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int( round (est / 1000) * 8 ) #print("Estimated bitrate:%d total:%d est:%d denom:%d" % (bitrate, total_bytes, est, value[0] - first_time - normalize_delay) ) if bitrate > 0: DB.set('bitrate', bitrate) has_bitrate = DB.get('bitrate') #if last_heartbeat: # logging.info("%d heartbeat %d" % (last_heartbeat, last_heartbeat_tid)) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running"); change_state = SHUTDOWN # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): misc.shutdown_real() else: if not process and not change_state: logging.info("Failed to find downloader, starting new one") file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next elif TS.unixtime('dl') - last_success > cascade_margin or expired_heartbeat: #logging.info("heartbeat expired %s %s %d %d %d" % (type(process_next), type(process), last_success, cascade_time, TS.unixtime('dl'))) # And we haven't created the next process yet, then we start it now. if not process_next: logging.info("Failed to find downloader, starting new one") file_name, process_next = download_start(file_name) # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and (expired_heartbeat or (TS.unixtime('dl') - last_success > cascade_time and process)): g_download_kill_pid += 1 #process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None time.sleep(cycle_time)
def stream_manager(): import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') first_time = 0 total_bytes = 0 normalize_delay = 6 cascade_time = misc.config['cascadetime'] cascade_buffer = misc.config['cascadebuffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycletime'] process = None process_next = None # The manager will be the one that starts this. misc.pid_map['webserver'] = Process(target=server.manager, args=(misc.config, )) misc.pid_map['webserver'].start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 logging.info('Starting cascaded downloader #%d. Next up in %ds' % (g_download_pid, cascade_margin)) # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % ( misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) process = Process(target=stream_download, args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['pruneevery'] + (1 / 8.0 - random.random() / 4.0) while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # flag = False if last_prune < (TS.unixtime('prune') - TS.ONE_DAY_SECOND * prune_duration): prune_duration = misc.config['pruneevery'] + ( 1 / 8.0 - random.random() / 4.0) # We just assume it can do its business in under a day misc.pid_map['prune'] = cloud.prune() last_prune = TS.unixtime('prune') TS.get_offset() lr_set = False while not misc.queue.empty(): flag = True what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We now don't toggle to flag in order to shutdown the # old process and start a new one elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime( 'dl') + misc.config['restart_overlap'] logging.info( "Restart requested ... shutting down downloader at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) while True: time.sleep(20) #logging.info(DB.get('runcount', use_cache=False)) logging.info( ('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) ps_out = int( os.popen( 'ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) if ps_out > 1: logging.info( "Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put( ('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn( "Couldn't find a replacement process ... not going anywhere." ) elif what == 'heartbeat': if not lr_set and value[1] > 100: lr_set = True DB.set('last_recorded', time.time()) if not has_bitrate: # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[1] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + 60)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int(round(est / 1000) * 8) DB.set('bitrate', bitrate) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running") change_state = SHUTDOWN # The only way for the bool to be toggled off is if we are not in full-mode ... # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): process = my_process_shutdown(process) process_next = my_process_shutdown(process_next) misc.shutdown_real() else: # Didn't respond in cycle_time seconds so kill it if not flag: process = my_process_shutdown(process) if not process and not change_state: file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade elif TS.unixtime('dl') - last_success > cascade_margin: # And we haven't created the next process yet, then we start it now. if not process_next: file_name, process_next = download_start(file_name) # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and TS.unixtime( 'dl') - last_success > cascade_time and process: logging.info("Stopping cascaded downloader") process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None # Increment the amount of time this has been running DB.incr('uptime', cycle_time) time.sleep(cycle_time)