def base_stats(): # Reports base-level statistical information about the health of the server. # This is used for the /stats and /heartbeat call. try: # for some reason this can lead to a memory error load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')] except: load = 0 uptime = TS.uptime() return { 'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ), 'human-now': TS.ts_to_name(), 'computer-uptime': uptime, 'computer-now': time.time(), 'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0), 'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0], 'version': __version__, 'uuid': config['uuid'], 'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 'load': load, 'files': [m.path for m in psutil.Process().open_files()], 'connections': len(psutil.Process().connections()), 'memory': [ # Current memory footprint in MB psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), # Maximum lifetime memory footpring in MB resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 ], 'threads': [ thread.name for thread in threading.enumerate() ], 'disk': cloud.size('.') / (1024.0 ** 3) }
def main(): client = db.get_client("localhost", 27017) db_conn = db.connect_to_db(client, "medium") collection = db.get_collection(db_conn, "userinfo") users = db.get(collection, {}) db.close(client) g = graph.create_graph(get_nodes(users), 200) graph_plt = graph.plot_graph( g, { "with_labels": False, "node_color": "blue", "width": 1.0, "node_width": 0.5 }) graph_plt.show() degree_histogram_plt = graph.plot_degree_distribution(g) degree_histogram_plt.show() betweenness_plt = graph.plot_betweenness(g) betweenness_plt.show() clustering_coefficient_plot = graph.plot_clustering_coefficient(g) clustering_coefficient_plot.show() print(graph.page_rank(g)) print(graph.average_clustering_coefficient(g))
def our_mime(): our_format = DB.get('format') or 'mp3' if our_format == 'aac': return 'audio/aac' # Default to mp3 return 'audio/mpeg'
def live(start, offset_min=0): """ Sends off a live-stream equivalent. Two formats are supported: * duration - In the form of strings such as "1pm" or "2:30pm" * offset - starting with a negative "-", this means "from the present". For instance, to start the stream from 5 minutes ago, you can do "-5" """ DB.incr('hits-live') if start[0] == '-' or start.endswith('min'): # dump things like min or m start = re.sub('[a-z]', '', start) return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302) # The start is expressed in times like "11:59am ..." We utilize the # library we wrote for streaming to get the minute of day this is. if start[0] == 'm': requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE else: candidate = start requested_minute = TS.to_utc('mon', candidate) - offset_min offset_sec = 0 range_header = request.headers.get('Range', None) if range_header: m = re.search('(\d+)-', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) # We use the byte to compute the offset offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0)) #print "--- REQUEST @ ", start, range_header, offset_sec current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE now_time = TS.now() requested_time = now_time - timedelta(minutes=current_minute) + timedelta(minutes=requested_minute) # print requested_time, now_time, requested_minute, current_minute # If the requested minute is greater than the current one, then we can presume that # the requested minute refers to yesterday ... as in, someone wants 11pm # and now it's 1am. if requested_minute > current_minute: requested_time -= timedelta(days=1) # It's important to do this AFTER the operation above otherwise we wrap around to yesterday requested_time += timedelta(seconds=offset_sec) # Get the info for the file that contains this timestamp start_info, requested_time_available = cloud.get_file_for_ts(target_time=requested_time, bias=-1) requested_time = max(requested_time, requested_time_available) start_second = (requested_time - start_info['start_date']).total_seconds() response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime()) return response
def get_size(fname): # Gets a file size or just plain guesses it if it doesn't exist yet. if os.path.exists(fname): return os.path.getsize(fname) # Otherwise we try to parse the magical file which doesn't exist yet. ts_re_duration = compile('_(\d*).{4}') ts = ts_re_duration.findall(fname) if len(ts): duration_min = int(ts[0]) bitrate = int(DB.get('bitrate') or 128) # # Estimating mp3 length is actually pretty easy if you don't have ID3 headers. # MP3s are rated at things like 128kb/s ... well there you go. # # They consider a k to be 10^3, not 2^10 # return int((bitrate / 8) * (duration_min * 60) * (10 ** 3)) # If we can't find it based on the name, then we are kinda # SOL and just return 0 return 0
def get_size(fname): # Gets a file size or just plain guesses it if it doesn't exist yet. if os.path.exists(fname): return os.path.getsize(fname) # Otherwise we try to parse the magical file which doesn't exist yet. ts_re_duration = compile('_(\d*).{4}') ts = ts_re_duration.findall(fname) if len(ts): duration_min = int(ts[0]) bitrate = int(DB.get('bitrate') or 128) # # Estimating mp3 length is actually pretty easy if you don't have ID3 headers. # MP3s are rated at things like 128kb/s ... well there you go. # # They consider a k to be 10^3, not 2^10 # return int((bitrate / 8) * (duration_min * 60) * (10**3)) # If we can't find it based on the name, then we are kinda # SOL and just return 0 return 0
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = misc.config['_private']['misc']['timezonedb_key'] url = "http://api.timezonedb.com/v2.1/get-time-zone?key={}&by=position&lat={}&lng={}".format(api_key, misc.config['lat'], misc.config['long']) try: stream = urlopen(url) data = stream.read().decode('utf8').split("\n")[1] xml = etree.fromstring(data) offset = xml.xpath('gmtOffset') opts = {'status': 'OK', 'offset': int(offset[0].text) } except Exception as exc: print(exc) opts = {'status': None} if opts['status'] == 'OK': offset = opts['offset'] / 60 logging.info("Found Offset: {}".format(offset)) DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to' url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s×tamp=%d&key=%s" % ( misc.config['lat'], misc.config['long'], when, api_key) try: stream = urlopen(url) data = stream.read().decode('utf8') opts = json.loads(data) except: opts = {'status': None} if opts['status'] == 'OK': logging.info( "Location: %s | offset: %s | dst: %s " % (opts['timeZoneId'], opts['rawOffset'], opts['dstOffset'])) offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60 DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def get_offset(force=False): # Contacts the goog, giving a longitude and lattitude and gets the time # offset with regard to the UTC. There's a sqlite cache entry for the offset. # Returns an int second offset. import lib.misc as misc # If we are testing this from an API level, then we don't # have a database if misc.IS_TEST: return 0 offset_backup = DB.get('offset') offset = DB.get('offset', expiry=ONE_HOUR_SECOND * 4) if not offset or force: from urllib.request import urlopen when = int(unixtime()) api_key = 'AIzaSyBkyEMoXrSYTtIi8bevEIrSxh1Iig5V_to' url = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s×tamp=%d&key=%s" % (misc.config['lat'], misc.config['long'], when, api_key) try: stream = urlopen(url) data = stream.read().decode('utf8') opts = json.loads(data) except: opts = {'status': None} if opts['status'] == 'OK': logging.info("Location: %s | offset: %s" % (opts['timeZoneId'], opts['rawOffset'])) offset = (int(opts['rawOffset']) + int(opts['dstOffset'])) / 60 DB.set('offset', offset) else: # use the old one DB.set('offset', offset_backup) offset = offset_backup return int(float(offset))
def get(postcode): rw_lock.acquire_read() try: if database['ready']: key = db.postcode_normalize(postcode) if key in database['keys']: return flask.jsonify(db.get(key)) else: return flask.jsonify(POSTCODE_NOT_FOUND), 404 else: return flask.jsonify(DATA_NOT_READY), 202 finally: rw_lock.release_read()
def signature(fname, blockcount=-1, depth=1): global _LASTFORMAT audio_format = DB.get('format') if not audio_format: audio_format, start = get_audio_format(fname) if audio_format: logging.info("Setting this stream's audio format as %s" % audio_format) DB.set('format', audio_format) else: logging.warn("Can't determine type of file for %s." % fname) return None, None block = None if audio_format == _FORMAT_AAC: sig, block = aac_signature(fname, blockcount) # We permit the idea that a file can be a false positive. But we do not # permit the idea that a file can be a false positive and correctly register # over some number of sequential blocks (currently set at whatever the # constant is below). if audio_format == _FORMAT_MP3 or not block or len(block) < 5: sig, block = mp3_signature(fname, blockcount) if len(block) > 0 and audio_format == _FORMAT_AAC: DB.set('format', _FORMAT_MP3) DB.clear_cache() # Stream formats can change actually. if len(block) < 5: tryformat = _FORMAT_AAC if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3 DB.set('format', tryformat) DB.clear_cache() # Make sure we don't foolishly recurse if depth == 1: return signature(fname, blockcount, depth + 1) else: # Otherwise if we fail to find anything upon our change-format desperation # move, we should return this as the none type to be handled appropriately. return None, None _LASTFORMAT = audio_format return sig, block
def save(fit, suffix="tr"): bin = tuple(fit.cut["pt_ups"]) dbname = "chib3s" + ("_" + suffix if suffix else "") db = shelve.open('data/%s.db' % dbname) year = db.get(fit.year, {}) year[bin] = fit.model.params() db[fit.year] = year print db[fit.year] db.close() figname = fit.year + ("_" + suffix if suffix else "") canvas.SaveAs("figs/data/fits3s/f%s_%d_%s.pdf" % (figname, bin[0], str(bin[1])))
def samp_guess(samp): if DB.get('samp'): return True global samp_distribution # first to this amount is our winner cutoff = 10 if samp not in samp_distribution: samp_distribution[samp] = 0 samp_distribution[samp] += 1 if samp_distribution[samp] > cutoff: DB.set('samp', samp) globals()['_FRAME_LENGTH'] = (1152.0 / samp)
def stitch_and_slice_process(file_list, relative_start_minute, duration_minute): # The process wrapper around stitch_and_slice to do it asynchronously. name_out = stream_name(file_list, relative_start_minute=relative_start_minute, duration_minute=duration_minute, absolute_start_minute=None) if os.path.isfile(name_out): file_size = os.path.getsize(name_out) # A "correct" filesize should be measured as more than 65% of what the # math would be. So first we can guess that. bitrate = int(DB.get('bitrate') or 128) estimate = (bitrate / 8) * (duration_minute * 60) * (10 ** 3) if 0.75 * estimate < file_size: logging.info("[stitch] File %s found" % name_out) return None # We presume that there is a file list we need to make stitched_list = stitch(file_list, force_stitch=True) logging.info("stitched") logging.info(stitched_list) logging.info("%d %d" % (len(file_list), len(stitched_list))) # We see if it was correct, on the condition that it had to be made if stitched_list and (len(stitched_list) == len(file_list) == 1) or (len(stitched_list) > 1 and len(file_list) > 1): info = stream_info(stitched_list) else: logging.warn("Unable to stitch file list") return None # print info, start_minute # After we've stitched together the audio then we start our slice # by figuring our the relative_start_minute of the slice, versus ours start_slice = relative_start_minute #max(start_minute - info['start_minute'], 0) # Now we need to take the duration of the stream we want, in minutes, and then # make sure that we don't exceed the length of the file. duration_slice = min(duration_minute, start_slice + info['duration_sec'] / 60.0) # print "startslice---", start_slice, relative_start_minute sliced_name = list_slice( list_in=stitched_list, name_out=name_out, start_sec=start_slice * 60.0, duration_sec=duration_slice * 60.0, ) return None
def signature(fname, blockcount=-1, depth=1): global _LASTFORMAT audio_format = DB.get('format') if not audio_format: audio_format, start = get_audio_format(fname) if audio_format: logging.info("Setting this stream's audio format as %s" % audio_format) DB.set('format', audio_format) else: logging.warn("Can't determine type of file for %s." % fname) return False block = None if audio_format == _FORMAT_AAC: sig, block = aac_signature(fname, blockcount) # We permit the idea that a file can be a false positive. But we do not # permit the idea that a file can be a false positive and correctly register # over some number of sequential blocks (currently set at whatever the # constant is below). if audio_format == _FORMAT_MP3 or not block or len(block) < 5: sig, block = mp3_signature(fname, blockcount) if len(block) > 0 and audio_format == _FORMAT_AAC: DB.set('format', _FORMAT_MP3) DB.clear_cache() # Stream formats can change actually. if len(block) < 5: tryformat = _FORMAT_AAC if audio_format == _FORMAT_AAC: tryformat = _FORMAT_MP3 DB.set('format', tryformat) DB.clear_cache() # Make sure we don't foolishly recurse if depth == 1: return signature(fname, blockcount, depth + 1) _LASTFORMAT = audio_format return sig, block
def live(start, offset_min=0): """ Sends off a live-stream equivalent. Two formats are supported: * duration - In the form of strings such as "1pm" or "2:30pm" * offset - starting with a negative "-", this means "from the present". For instance, to start the stream from 5 minutes ago, you can do "-5" """ DB.incr('hits-live') if start[0] == '-' or start.endswith('min'): # dump things like min or m start = re.sub('[a-z]', '', start) return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302) # The start is expressed in times like "11:59am ..." We utilize the # library we wrote for streaming to get the minute of day this is. if start[0] == 'm': requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE else: candidate = start requested_minute = TS.to_utc('mon', candidate) - offset_min offset_sec = 0 range_header = request.headers.get('Range', None) if range_header: m = re.search('(\d+)-', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) # We use the byte to compute the offset offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0)) #print "--- REQUEST @ ", start, range_header, offset_sec current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE now_time = TS.now() requested_time = now_time - timedelta( minutes=current_minute) + timedelta(minutes=requested_minute) # print requested_time, now_time, requested_minute, current_minute # If the requested minute is greater than the current one, then we can presume that # the requested minute refers to yesterday ... as in, someone wants 11pm # and now it's 1am. if requested_minute > current_minute: requested_time -= timedelta(days=1) # It's important to do this AFTER the operation above otherwise we wrap around to yesterday requested_time += timedelta(seconds=offset_sec) # Get the info for the file that contains this timestamp start_info, requested_time_available = cloud.get_file_for_ts( target_time=requested_time, bias=-1) if start_info is None or requested_time_available is None: return do_error("Can't find any matching files") requested_time = max(requested_time, requested_time_available) start_second = (requested_time - start_info['start_date']).total_seconds() response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime()) return response
def stream_manager(): global g_download_kill_pid import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') if has_bitrate and int(has_bitrate) == 0: has_bitrate = False first_time = 0 total_bytes = 0 normalize_delay = 6 cycle_count = 0 cascade_time = misc.config['cascade_time'] cascade_buffer = misc.config['cascade_buffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 last_heartbeat = None change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycle_time'] process = None process_next = None # The manager will be the one that starts this. #server.manager(misc.config) webserver = Thread(target=server.manager, name='Webserver', args=(misc.config,)) webserver.start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) logging.info('Starting download #%d (%s). Next up in %ds' % (g_download_pid, file_name, cascade_margin)) process = Thread(target=stream_download, name='Download-%d:%s' % (g_download_pid, TS.ts_to_name()), args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.daemon = True process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration last_heartbeat_tid = -1 while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # if last_prune < (TS.unixtime('prune') - prune_duration): prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration # We just assume it can do its business in under a day prune = cloud.prune() last_prune = TS.unixtime('prune') misc.last_prune = last_prune # Increment the amount of time this has been running if cycle_count % 30 == 0: # we only do these things occasionally, they # are either not very important or are not # expected to change that often TS.get_offset() cycle_count += 1 lr_set = False expired_heartbeat = last_heartbeat and time.time() - last_heartbeat > cycle_time * 2 while not misc.queue.empty(): what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We expire our heartbeat in order to force a new stream # to start expired_heartbeat = True elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap'] logging.info("Restart requested ... shutting down download at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) #misc.shutdown_real(do_restart=False) #misc.download_ipc.put(('shutdown_time', shutdown_time)) while True: time.sleep(5) with open(misc.PIDFILE_MANAGER, 'r') as f: manager_pid = f.read() #print manager_pid, os.getpid(), manager_pid == os.getpid() #logging.info(DB.get('runcount', use_cache=False)) #logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) if ps_out > 1: logging.info("Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put(('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn("Couldn't find a replacement process ... not going anywhere."); elif what == 'heartbeat': if not lr_set: lr_set = True last_heartbeat = time.time() last_heartbeat_tid = value[1] if last_heartbeat_tid < g_download_kill_pid: logging.warn("hb: Got a heartbeat for #%d but everything below #%d should be gone!" % (last_heartbeat_tid, g_download_kill_pid)) DB.set('last_recorded', time.time()) if not has_bitrate: margin = 60 # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[2] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + margin)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int( round (est / 1000) * 8 ) #print("Estimated bitrate:%d total:%d est:%d denom:%d" % (bitrate, total_bytes, est, value[0] - first_time - normalize_delay) ) if bitrate > 0: DB.set('bitrate', bitrate) has_bitrate = DB.get('bitrate') #if last_heartbeat: # logging.info("%d heartbeat %d" % (last_heartbeat, last_heartbeat_tid)) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running"); change_state = SHUTDOWN # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): misc.shutdown_real() else: if not process and not change_state: logging.info("Failed to find downloader, starting new one") file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next elif TS.unixtime('dl') - last_success > cascade_margin or expired_heartbeat: #logging.info("heartbeat expired %s %s %d %d %d" % (type(process_next), type(process), last_success, cascade_time, TS.unixtime('dl'))) # And we haven't created the next process yet, then we start it now. if not process_next: logging.info("Failed to find downloader, starting new one") file_name, process_next = download_start(file_name) # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and (expired_heartbeat or (TS.unixtime('dl') - last_success > cascade_time and process)): g_download_kill_pid += 1 #process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None time.sleep(cycle_time)
def stream_info(file_name, skip_size=False): # Determines the date the thing starts, # the minute time it starts, and the duration # # If you do skip_size = True then you avoid any i/o # and have everything determined solely by the name. # This means that some values returned will be set to # None if type(file_name) is list: return list_info(file_name) info = _TS_RE.findall(file_name) # 0 byte files can throw this thing off # if it's set to None duration_sec = 0 start_minute = None start_date = None end_minute = None callsign = None if info: info = info[0] callsign = info[0] # We have two formats here ... one is unix time # and the other is a much more readable time. We will determine # whether it's UNIX time by seeing if it's greater than 2**36, which # makes us not Y4147 compliant. Oh dear - better fix this sometime # in the next 2100 years! unix_time = int(info[1]) if unix_time > 2**36: unix_time = TS.name_to_unix(unix_time) start_minute = TS.to_minute(unix_time) start_date = datetime.fromtimestamp(unix_time) else: logging.warn("Failure to find info for '%s'" % file_name) return None try: # Just skip over this if the skip_size is set if skip_size: raise Exception # If we don't have a bitrate yet we assume 128 bitrate = int(DB.get('bitrate') or 128) if bitrate == 0: logging.warn("Bitrate is 0. This is a bug.") raise Exception file_size = os.path.getsize(file_name) # If our file size is zero that means that we hit a bug # trying to stitch this, so we raise and exception and # try to reconstitute the file. if file_size == 0: logging.warn( "File %s exists and is 0 bytes. Ignoring it for computation." % file_name) raise Exception duration_sec = file_size / (bitrate * (1000.0 / 8.0)) except Exception as inst: file_size = None # If we can't find a duration then we try to see if it's in the file name ts_re_duration = re.compile('_(\d*).{4}') ts = ts_re_duration.findall(file_name) if ts: duration_sec = int(ts[0]) * 60.0 if isinstance(duration_sec, (int, float)): end_minute = (duration_sec / 60.0 + start_minute) % TS.MINUTES_PER_WEEK return { 'callsign': callsign, 'week_number': start_date.isocalendar()[1], 'name': file_name, 'start_minute': start_minute, 'start_date': start_date, 'end_minute': end_minute, 'size': file_size, 'duration_sec': duration_sec }
def mp3_signature(file_name, blockcount=-1): # Opens an mp3 file, find all the blocks, the byte offset of the blocks, and if they # are audio blocks, construct a signature mapping of some given beginning offset of the audio # data ... this is intended for stitching. frame_sig = [] start_byte = [] first_header_seen = False header_attempts = 0 # # Looking at the first 16 bytes of the payload yield a rate that is 99.75% unique # as tested over various corpi ranging from 1,000,000 - 7,000,000 blocks. # # There's an additional precautions of looking for a string of 4 matches which # mitigates this even further # read_size = 8 is_stream = False start_pos = None frame_size = None assumed_set = None attempt_set = None next_read = False if isinstance(file_name, str): file_handle = open(file_name, 'rb') else: # This means we can handle file pointers file_handle = file_name is_stream = True start_pos = file_handle.tell() while blockcount != 0: if first_header_seen: blockcount -= 1 else: header_attempts += 1 if next_read: file_handle.seek(next_read, 0) next_read = False frame_start = last_read = file_handle.tell() header = file_handle.read(2) if header and len(header) == 2: b1 = header[1] if header[0] == 0xff and (b1 >> 4) == 0xf: try: b2 = ord(file_handle.read(1)) b3 = ord(file_handle.read(1)) # If we are at the EOF except: break if frame_size and not assumed_set: attempt_set = [samp_rate, bit_rate, pad_bit] frame_size, samp_rate, bit_rate, pad_bit, mode = mp3_info( b1, b2, b3) if not frame_size: next_read = last_read + 1 continue samp_guess(samp_rate) # We make sure that we get the same set of samp_rate, bit_rate, pad_bit twice if not assumed_set and attempt_set == [ samp_rate, bit_rate, pad_bit ]: assumed_set = attempt_set attempt_set = False # This is another indicator that we could be screwing up ... elif assumed_set and samp_rate != assumed_set[ 0] and bit_rate != assumed_set[1]: next_read = last_read + 1 continue if not first_header_seen: first_header_seen = True # Get the signature sig = file_handle.read(read_size) frame_sig.append(sig) start_byte.append(frame_start) # Move forward the frame file_handle.read size + 4 byte header throw_away = file_handle.read(frame_size - (read_size + 4)) # ID3 tag for some reason elif header == '\x49\x44': # Rest of the header throw_away = file_handle.read(4) # # Quoting http://id3.org/d3v2.3.0 # # The ID3v2 tag size is encoded with four bytes where the most significant bit # (bit 7) is set to zero in every byte, making a total of 28 bits. The zeroed # bits are ignored, so a 257 bytes long tag is represented as $00 00 02 01. # candidate = struct.unpack('>I', file_handle.read(4))[0] size = ((candidate & 0x007f0000) >> 2) | ( (candidate & 0x00007f00) >> 1) | (candidate & 0x0000007f) file_handle.read(size) # ID3 TAG -- 128 bytes long elif header == '\x54\x41': # We've already read 2 so we can go 126 forward file_handle.read(126) elif len(header) == 1: # We are at the end of file, but let's just continue. next elif header_attempts > _MAX_HEADER_ATTEMPTS: if not is_stream: import binascii samp = DB.get('samp', default=44100) if type(samp) is str: logging.debug("OMG WHAT THE F**K") samp = int(samp) logging.debug( '[mp3-sig] %d[%d/%d]%s:%s:%s %s %d' % (len(frame_sig), header_attempts, _MAX_HEADER_ATTEMPTS, binascii.b2a_hex(header), binascii.b2a_hex(file_handle.read(5)), file_name, hex(file_handle.tell()), len(start_byte) * (1152.0 / DB.get('samp', default=44100)) / 60)) # This means that perhaps we didn't guess the start correct so we try this again if len(frame_sig ) == 1 and header_attempts < _MAX_HEADER_ATTEMPTS: logging.debug("[mp3-sig] False start -- trying again") # seek to the first start byte + 1 file_handle.seek(start_byte[0] + 2) # discard what we thought was the first start byte and # frame signature start_byte = [] frame_sig = [] first_header_seen = False # Also our assumed set was probably wrong assumed_set = None else: break elif first_header_seen: next_read = last_read + 1 header_attempts += 1 else: break if not is_stream: file_handle.close() else: file_handle.seek(start_pos) return frame_sig, start_byte
def stream_manager(): import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') first_time = 0 total_bytes = 0 normalize_delay = 6 cascade_time = misc.config['cascadetime'] cascade_buffer = misc.config['cascadebuffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycletime'] process = None process_next = None # The manager will be the one that starts this. misc.pid_map['webserver'] = Process(target=server.manager, args=(misc.config, )) misc.pid_map['webserver'].start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 logging.info('Starting cascaded downloader #%d. Next up in %ds' % (g_download_pid, cascade_margin)) # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % ( misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) process = Process(target=stream_download, args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['pruneevery'] + (1 / 8.0 - random.random() / 4.0) while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # flag = False if last_prune < (TS.unixtime('prune') - TS.ONE_DAY_SECOND * prune_duration): prune_duration = misc.config['pruneevery'] + ( 1 / 8.0 - random.random() / 4.0) # We just assume it can do its business in under a day misc.pid_map['prune'] = cloud.prune() last_prune = TS.unixtime('prune') TS.get_offset() lr_set = False while not misc.queue.empty(): flag = True what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We now don't toggle to flag in order to shutdown the # old process and start a new one elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime( 'dl') + misc.config['restart_overlap'] logging.info( "Restart requested ... shutting down downloader at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) while True: time.sleep(20) #logging.info(DB.get('runcount', use_cache=False)) logging.info( ('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) ps_out = int( os.popen( 'ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) if ps_out > 1: logging.info( "Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put( ('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn( "Couldn't find a replacement process ... not going anywhere." ) elif what == 'heartbeat': if not lr_set and value[1] > 100: lr_set = True DB.set('last_recorded', time.time()) if not has_bitrate: # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[1] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + 60)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int(round(est / 1000) * 8) DB.set('bitrate', bitrate) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running") change_state = SHUTDOWN # The only way for the bool to be toggled off is if we are not in full-mode ... # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): process = my_process_shutdown(process) process_next = my_process_shutdown(process_next) misc.shutdown_real() else: # Didn't respond in cycle_time seconds so kill it if not flag: process = my_process_shutdown(process) if not process and not change_state: file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade elif TS.unixtime('dl') - last_success > cascade_margin: # And we haven't created the next process yet, then we start it now. if not process_next: file_name, process_next = download_start(file_name) # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and TS.unixtime( 'dl') - last_success > cascade_time and process: logging.info("Stopping cascaded downloader") process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None # Increment the amount of time this has been running DB.incr('uptime', cycle_time) time.sleep(cycle_time)
def stitch_and_slice_process(file_list, relative_start_minute, duration_minute, destination_path=None): # The process wrapper around stitch_and_slice to do it asynchronously. if destination_path: import lib.misc as misc name_out = "%s/%s" % (misc.DIR_SLICES, destination_path) else: name_out = stream_name(file_list, relative_start_minute=relative_start_minute, duration_minute=duration_minute, absolute_start_minute=None) if os.path.isfile(name_out): file_size = os.path.getsize(name_out) # A "correct" filesize should be measured as more than 65% of what the # math would be. So first we can guess that. bitrate = int(DB.get('bitrate') or 128) estimate = (bitrate / 8) * (duration_minute * 60) * (10**3) if 0.75 * estimate < file_size: logging.info("[stitch] File %s found" % name_out) return None # We presume that there is a file list we need to make stitched_list = stitch(file_list, force_stitch=True) if stitched_list: logging.info("stitched") logging.info(stitched_list) logging.info("%d %d" % (len(file_list), len(stitched_list))) # We see if it was correct, on the condition that it had to be made if stitched_list and file_list and (len(stitched_list) == len(file_list) == 1) or (len(stitched_list) > 1 and len(file_list) > 1): info = stream_info(stitched_list) else: logging.warn("Unable to stitch file list") return None # After we've stitched together the audio then we start our slice # by figuring our the relative_start_minute of the slice, versus ours start_slice = relative_start_minute #max(start_minute - info['start_minute'], 0) # Now we need to take the duration of the stream we want, in minutes, and then # make sure that we don't exceed the length of the file. duration_slice = min(duration_minute, start_slice + info['duration_sec'] / 60.0) # print "startslice---", start_slice, relative_start_minute sliced_name = list_slice( list_in=stitched_list, name_out=name_out, start_sec=start_slice * 60.0, duration_sec=duration_slice * 60.0, ) return None
if args.command == 'tests': suite = TestLoader().discover('tests', pattern='*.py') result = TextTestRunner(verbosity=2).run(suite) result = 0 if result.wasSuccessful() else 1 exit(result) cfg = read_config(args.config) logger = init_logger() renderer = DistributedRenderer() qualifier = DistributedQualifier() base_image_path = cfg['main']['populationPath'] + basename(cfg['main']['baseImage']) fitnessMachine = MeshFitnessMachine(base_image_path, renderer, qualifier) population = Population(MeshGenome, fitnessMachine) population.generation = int(db.get('generation', default=0)) accuracy.register(population) monitor.register(population) if args.command == 'reset' or not population.generation: population.initialize() else: population.load() do('cp -v %s %s' % (cfg['main']['baseImage'], base_image_path)) try: population.evolve() except KeyboardInterrupt as ki: pass
def stream_info(file_name, skip_size=False): # Determines the date the thing starts, # the minute time it starts, and the duration # # If you do skip_size = True then you avoid any i/o # and have everything determined solely by the name. # This means that some values returned will be set to # None if type(file_name) is list: return list_info(file_name) info = _TS_RE.findall(file_name) duration_sec = None start_minute = None start_date = None end_minute = None callsign = None if info: info = info[0] callsign = info[0] # We have two formats here ... one is unix time # and the other is a much more readable time. We will determine # whether it's UNIX time by seeing if it's greater than 2**36, which # makes us not Y4147 compliant. Oh dear - better fix this sometime # in the next 2100 years! unix_time = int(info[1]) if unix_time > 2**36: unix_time = TS.name_to_unix(unix_time) start_minute = TS.to_minute(unix_time) start_date = datetime.fromtimestamp(unix_time) else: logging.warn("Failure to find info for '%s'" % file_name) return None try: # Just skip over this if the skip_size is set if skip_size: raise Exception # If we don't have a bitrate yet we assume 128 bitrate = int(DB.get('bitrate') or 128) if bitrate == 0: logging.warn("Bitrate is 0. This is a bug.") raise Exception file_size = os.path.getsize(file_name) # If our file size is zero that means that we hit a bug # trying to stitch this, so we raise and exception and # try to reconstitute the file. if file_size == 0: logging.warn("File %s exists and is 0 bytes. Ignoring it for computation." % file_name) raise Exception duration_sec = file_size / (bitrate * (1000.0 / 8.0)) except Exception as inst: file_size = None # If we can't find a duration then we try to see if it's in the file name ts_re_duration = re.compile('_(\d*).{4}') ts = ts_re_duration.findall(file_name) if ts: duration_sec = int(ts[0]) * 60.0 if isinstance(duration_sec, (int, float)): end_minute = (duration_sec / 60.0 + start_minute) % TS.MINUTES_PER_WEEK return { 'callsign': callsign, 'week_number': start_date.isocalendar()[1], 'name': file_name, 'start_minute': start_minute, 'start_date': start_date, 'end_minute': end_minute, 'size': file_size, 'duration_sec': duration_sec }
def mp3_signature(file_name, blockcount=-1): # Opens an mp3 file, find all the blocks, the byte offset of the blocks, and if they # are audio blocks, construct a signature mapping of some given beginning offset of the audio # data ... this is intended for stitching. frame_sig = [] start_byte = [] first_header_seen = False header_attempts = 0 # # Looking at the first 16 bytes of the payload yield a rate that is 99.75% unique # as tested over various corpi ranging from 1,000,000 - 7,000,000 blocks. # # There's an additional precautions of looking for a string of 4 matches which # mitigates this even further # read_size = 8 is_stream = False start_pos = None frame_size = None assumed_set = None attempt_set = None last_tell = None go_back = -1 if isinstance(file_name, str): file_handle = open(file_name, 'rb') else: # This means we can handle file pointers file_handle = file_name is_stream = True start_pos = file_handle.tell() while blockcount != 0: if first_header_seen: blockcount -= 1 else: header_attempts += 1 if header_attempts > 2: file_handle.seek(go_back, 1) frame_start = file_handle.tell() if frame_start == last_tell: file_handle.seek(last_tell + 1, 1) header = file_handle.read(2) if header and len(header) == 2: b1 = header[1] if header[0] == 0xff and (b1 >> 4) == 0xf: try: b = ord(file_handle.read(1)) # If we are at the EOF except: break if frame_size and not assumed_set: attempt_set = [samp_rate, bit_rate, pad_bit] frame_size, samp_rate, bit_rate, pad_bit = mp3_info(b, b1) last_tell = file_handle.tell() if not frame_size: file_handle.seek(go_back, 1) go_back = -1 continue samp_guess(samp_rate) # We make sure that we get the same set of samp_rate, bit_rate, pad_bit twice if not assumed_set and attempt_set == [samp_rate, bit_rate, pad_bit]: assumed_set = attempt_set attempt_set = False # This is another indicator that we could be screwing up ... elif assumed_set and samp_rate != assumed_set[0] and bit_rate != assumed_set[1]: file_handle.seek(go_back, 1) continue if not first_header_seen: first_header_seen = True # Rest of the header throw_away = file_handle.read(1) # Get the signature sig = file_handle.read(read_size) frame_sig.append(sig) start_byte.append(frame_start) # Move forward the frame file_handle.read size + 4 byte header throw_away = file_handle.read(frame_size - (read_size + 4)) if file_handle.tell() > 3: go_back = -3 # ID3 tag for some reason elif header == '\x49\x44': # Rest of the header throw_away = file_handle.read(4) # # Quoting http://id3.org/d3v2.3.0 # # The ID3v2 tag size is encoded with four bytes where the most significant bit # (bit 7) is set to zero in every byte, making a total of 28 bits. The zeroed # bits are ignored, so a 257 bytes long tag is represented as $00 00 02 01. # candidate = struct.unpack('>I', file_handle.read(4))[0] size = ((candidate & 0x007f0000) >> 2 ) | ((candidate & 0x00007f00) >> 1 ) | (candidate & 0x0000007f) file_handle.read(size) # ID3 TAG -- 128 bytes long elif header == '\x54\x41': # We've already read 2 so we can go 126 forward file_handle.read(126) elif len(header) == 1: # We are at the end of file, but let's just continue. next elif header_attempts > _MAX_HEADER_ATTEMPTS: if not is_stream: import binascii logging.debug('[mp3-sig] %d[%d/%d]%s:%s:%s %s %d' % (len(frame_sig), header_attempts, _MAX_HEADER_ATTEMPTS, binascii.b2a_hex(header), binascii.b2a_hex(file_handle.read(5)), file_name, hex(file_handle.tell()), len(start_byte) * (1152.0 / DB.get('samp', default=44100)) / 60)) # This means that perhaps we didn't guess the start correct so we try this again if len(frame_sig) == 1 and header_attempts < _MAX_HEADER_ATTEMPTS: logging.debug("[mp3-sig] False start -- trying again") # seek to the first start byte + 1 file_handle.seek(start_byte[0] + 2) # discard what we thought was the first start byte and # frame signature start_byte = [] frame_sig = [] first_header_seen = False # Also our assumed set was probably wrong assumed_set = None else: break elif first_header_seen: header_attempts += 1 if header_attempts > 2: file_handle.seek(go_back, 1) go_back = -1 else: break if not is_stream: file_handle.close() else: file_handle.seek(start_pos) return frame_sig, start_byte