def base_stats(): # Reports base-level statistical information about the health of the server. # This is used for the /stats and /heartbeat call. try: # for some reason this can lead to a memory error load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')] except: load = 0 uptime = TS.uptime() return { 'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ), 'human-now': TS.ts_to_name(), 'computer-uptime': uptime, 'computer-now': time.time(), 'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0), 'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0], 'version': __version__, 'uuid': config['uuid'], 'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 'load': load, 'files': [m.path for m in psutil.Process().open_files()], 'connections': len(psutil.Process().connections()), 'memory': [ # Current memory footprint in MB psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), # Maximum lifetime memory footpring in MB resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0 ], 'threads': [ thread.name for thread in threading.enumerate() ], 'disk': cloud.size('.') / (1024.0 ** 3) }
def cback(data): if not misc.params['shutdown_time']: if not misc.download_ipc.empty(): what, value = misc.download_ipc.get(False) if what == 'shutdown_time': misc.params['shutdown_time'] = value elif TS.unixtime('dl') > misc.params['shutdown_time']: sys.exit(0) if misc.params['isFirst'] == True: misc.params['isFirst'] = False if len(data) < 800: if re.match('https?://', data): # If we are getting a redirect then we don't mind, we # just put it in the stream and then we leave misc.queue.put(('stream', data.strip())) return True # A pls style playlist elif re.findall('File\d', data, re.M): logging.info('Found a pls, using the File1 parameter') matches = re.findall('File1=(.*)\n', data, re.M) misc.queue.put(('stream', matches[0].strip())) return True # This provides a reliable way to determine bitrate. We look at how much # data we've received between two time periods misc.queue.put(('heartbeat', (TS.unixtime('hb'), len(data)))) if not nl['stream']: try: nl['stream'] = open(file_name, 'w') except Exception as exc: logging.critical( "Unable to open %s. Can't record. Must exit." % file_name) sys.exit(-1) nl['stream'].write(data) if not misc.manager_is_running(): misc.shutdown()
def cback(data): if not misc.params['shutdown_time']: if not misc.download_ipc.empty(): what, value = misc.download_ipc.get(False) if what == 'shutdown_time': misc.params['shutdown_time'] = value elif TS.unixtime('dl') > misc.params['shutdown_time']: sys.exit(0) if misc.params['isFirst'] == True: misc.params['isFirst'] = False if len(data) < 800: if re.match('https?://', data): # If we are getting a redirect then we don't mind, we # just put it in the stream and then we leave misc.queue.put(('stream', data.strip())) return True # A pls style playlist elif re.findall('File\d', data, re.M): logging.info('Found a pls, using the File1 parameter') matches = re.findall('File1=(.*)\n', data, re.M) misc.queue.put(('stream', matches[0].strip())) return True # This provides a reliable way to determine bitrate. We look at how much # data we've received between two time periods misc.queue.put(('heartbeat', (TS.unixtime('hb'), len(data)))) if not nl['stream']: try: nl['stream'] = open(file_name, 'w') except Exception as exc: logging.critical("Unable to open %s. Can't record. Must exit." % file_name) sys.exit(-1) nl['stream'].write(data) if not misc.manager_is_running(): misc.shutdown()
def manager(config): # Main flask process that manages the end points. app = Flask(__name__) def webserver_shutdown(signal=15, frame=None): logging.info('Shutting down webserver') request.environ.get('werkzeug.server.shutdown')() def success(message): return jsonify({'res': True, 'message': message}), 200 def fail(message): return jsonify({'res': False, 'message': message}), 500 # from http://blog.asgaard.co.uk/2012/08/03/http-206-partial-content-for-flask-python @app.after_request def after_request(response): # Supports 206 partial content requests for podcast streams. response.headers.add('Accept-Ranges', 'bytes') logging.info('ua - %s' % request.headers.get('User-Agent')) return response def send_file_partial(path, requested_path='', file_name=None): # Wrapper around send_file which handles HTTP 206 Partial Content # (byte ranges) # If we requested something that isn't around, then we bail. if not os.path.exists(path): return 'File %s not found. Perhaps the stream is old?' % requested_path, 404 range_header = request.headers.get('Range', None) if not range_header: with open(path, 'rb') as f: data = f.read() rv = Response(data, 200, mimetype=audio.our_mime(), direct_passthrough=True) if not file_name: file_name = os.path.basename(path) rv.headers.add('Content-Disposition', 'attachment; filename="%s"' % file_name) return rv size = os.path.getsize(path) byte1, byte2 = 0, None m = re.search('(\d+)-(\d*)', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) if g[1]: byte2 = int(g[1]) length = size - byte1 if byte2 is not None: length = byte2 - byte1 data = None with open(path, 'rb') as f: f.seek(byte1) data = f.read(length + 1) rv = Response(data, 206, mimetype=audio.our_mime(), direct_passthrough=True) disposition = 'attachment;' if file_name: disposition += ' file_name="%s"' % file_name rv.headers.add('Content-Disposition', disposition) rv.headers.add('Content-Range', 'bytes {0}-{1}/{2}'.format(byte1, byte1 + length, size)) return rv # From http://stackoverflow.com/questions/13317536/get-a-list-of-all-routes-defined-in-the-app @app.route("/help") def site_map(): """ Shows all the end points supported by the current server, the options and the documentation. """ output = [ '-=#| Welcome to indycast %s API help |#=-' % misc.__version__, '' ] for rule in app.url_map.iter_rules(): if rule.endpoint == 'static': continue options = {} for arg in rule.arguments: options[arg] = "{0}".format(arg) url = url_for(rule.endpoint, **options) line = "{} {}".format(url, app.view_functions[rule.endpoint].__doc__) output.append(line) output.append("") return Response('\n'.join(output), mimetype='text/plain') @app.route('/robots.txt') def robots(): """ Sends off robots.txt for crawlers """ return send_file('%s/robots.txt' % (misc.source_dir, )) @app.route('/uuid') def my_uuid(): """ Returns this server's uuid which is generated each time it is run. This is used to determine whether this is the official server or not. """ return misc.config['uuid'] @app.route('/db') def database(): """ Backs up the current sqlite3 db and sends a gzipped version of it as the response. """ filename = '%s/%s/%s-%s.gz' % ( misc.config['storage'], misc.DIR_BACKUPS, misc.config['callsign'], time.strftime('%Y%m%d-%H%M', time.localtime())) os.popen('/usr/bin/sqlite3 config.db .dump | /bin/gzip -9 > %s' % filename) time.sleep(1) return send_file(filename) @app.route('/rename') def rename(): return cloud.rename() @app.route('/reindex') def reindex(): """ Starts the prune process which cleans up and offloads audio files but also re-index the database. This is useful in the cases where bugs have led to improper registration of the streams and a busted building of the database. It's fairly expensive in I/O costs so this shouldn't be done as the default. """ cloud.prune(reindex=True) return success('Reindexing...') @app.route('/prune') def prune(): """ Starts the prune sub-process which cleans up and offloads audio files following the rules outlined in the configuration file (viewable with the stats call) """ cloud.prune(force=True) return success('Pruning...') @app.route('/slices/<time>/<name>') def send_named_stream(time, name): """ Similar to the /slices/path endpoint, this end point sends a stream that is at time <time> with name <name>. """ return send_stream(time, download_name=name) @app.route('/slices/<path:path>') def send_stream(path, download_name=None): """ Downloads a stream from the server. The path is callsign-date_duration.mp3 * callsign: The callsign returned by /stats * date: in the format YYYYMMDDHHMM such as 201508011005 for 2015-08-01 10:05 * duration: A value, in minutes, to return. The mp3 extension should be used regardless of the actual format of the stream - although the audio returned will be in the streams' native format. The streams are created and sent on-demand, so there may be a slight delay before it starts. """ DB.incr('hits-dl') base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES) if not path.startswith(config['callsign']): path = "%s-%s" % (config['callsign'], path) if not path.endswith('.mp3'): path = "%s.mp3" % path file_name = base_dir + path # If the file doesn't exist, then we need to slice it and create it based on our query. # Also, if it's a zero byte file, then we try to create it again. if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0: cloud.register_stream_list() # This tells us that if it were to exist, it would be something # like this. request_info = audio.stream_info(file_name) logging.info(("expected value", request_info)) # we can do something rather specific here ... # # first we get our generic stream list using our start_minute from the info. stream_list, episode_list = cloud.find_streams( start_list=[request_info['start_minute']], duration_min=request_info['duration_sec'] / 60.0) for ep in episode_list: episode = ep[0] first_slice = episode[0] if first_slice['week_number'] == request_info['week_number']: # This means that we've found the episode that we want # We will block on this. relative_start_minute = request_info[ 'start_minute'] - first_slice['start_minute'] logging.info(episode) audio.stitch_and_slice_process( file_list=episode, relative_start_minute=relative_start_minute, duration_minute=request_info['duration_sec'] / 60.0, destination_path=path) # And break out of our loop ... now everything should exist. break return send_file_partial("%s/%s" % (base_dir, path), requested_path=path, file_name=download_name) @app.route('/halt') def halt(): """ Stops the webserver. This request must be issued from the localhost in order to succeed. """ if request.remote_addr == '127.0.0.1': webserver_shutdown() misc.shutdown(do_restart=False) return success('halt...') else: return fail('halt aborted. Must be requested from the localhost') @app.route('/restart') def restart(): """ Restarts an instance. This does so in a gapless non-overlapping way. """ webserver_shutdown() misc.shutdown(do_restart=True) return success('restarting...') @app.route('/dolist') def dolist(): return success(misc.queue_dbg()) @app.route('/upgrade') def upgrade(): """ Goes to the source directory, pulls down the latest from git and if the versions are different, the application restarts. """ cwd = os.getcwd() os.chdir(misc.source_dir) os.system('/usr/bin/git pull') # See what the version is after the pull newversion = os.popen("/usr/bin/git describe").read().strip() if newversion != misc.__version__: os.system('/usr/local/bin/pip install --user -r requirements.txt') # from http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/ misc.shutdown(do_restart=True) return success("Upgrading from %s to %s" % (misc.__version__, newversion)) os.chdir(cwd) return success('Version %s is current' % misc.__version__) @app.route('/heartbeat') def heartbeat(): """ A low resource version of the /stats call ... this is invoked by the server health check. Only the vitals are reported. It helps us see if disk space is going nuts or if we aren't recording right now. This allows us to check if a restart happened between invocations. """ return jsonify(misc.base_stats()), 200 @app.route('/stats') def stats(): """ Reports various statistical metrics on a particular server. Use this with the graph.py tool to see station coverage. """ misc.am_i_official() stats = misc.base_stats() lockMap = {} for k, v in misc.lockMap.items(): lock = v.acquire(False) lockMap[k] = True if lock: lockMap[k] = False v.release() stats.update({ 'kv': DB.all('kv'), 'locks': lockMap, 'pwd': os.getcwd(), 'free': os.popen("/bin/df -h / | /usr/bin/tail -1").read().strip(), # Reporting the list as fractional GB is more useful. 'streams': DB.all('streams', sort_by='start_unix'), 'config': misc.public_config() }) return jsonify(stats), 200 # Using http://flask.pocoo.org/docs/0.10/patterns/streaming/ as a reference. @app.route('/live/<start>') def live(start, offset_min=0): """ Sends off a live-stream equivalent. Two formats are supported: * duration - In the form of strings such as "1pm" or "2:30pm" * offset - starting with a negative "-", this means "from the present". For instance, to start the stream from 5 minutes ago, you can do "-5" """ DB.incr('hits-live') if start[0] == '-' or start.endswith('min'): # dump things like min or m start = re.sub('[a-z]', '', start) return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302) # The start is expressed in times like "11:59am ..." We utilize the # library we wrote for streaming to get the minute of day this is. if start[0] == 'm': requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE else: candidate = start requested_minute = TS.to_utc('mon', candidate) - offset_min offset_sec = 0 range_header = request.headers.get('Range', None) if range_header: m = re.search('(\d+)-', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) # We use the byte to compute the offset offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0)) #print "--- REQUEST @ ", start, range_header, offset_sec current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE now_time = TS.now() requested_time = now_time - timedelta( minutes=current_minute) + timedelta(minutes=requested_minute) # print requested_time, now_time, requested_minute, current_minute # If the requested minute is greater than the current one, then we can presume that # the requested minute refers to yesterday ... as in, someone wants 11pm # and now it's 1am. if requested_minute > current_minute: requested_time -= timedelta(days=1) # It's important to do this AFTER the operation above otherwise we wrap around to yesterday requested_time += timedelta(seconds=offset_sec) # Get the info for the file that contains this timestamp start_info, requested_time_available = cloud.get_file_for_ts( target_time=requested_time, bias=-1) if start_info is None or requested_time_available is None: return do_error("Can't find any matching files") requested_time = max(requested_time, requested_time_available) start_second = (requested_time - start_info['start_date']).total_seconds() response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime()) return response @app.route('/at/<start>/<duration_string>') def at(start, duration_string='1hr'): """ Sends a stream using a human-readable (and human-writable) definition at start time. This uses the dateutils.parser library and so strings such as "Monday 2pm" are accepted. Because the space, 0x20 is such a pain in HTTP, you can use "_", "-" or "+" to signify it. For instance, /at/monday_2pm/1hr Will work fine """ # If it's say 1am, and I request 11pm without a day specification, it will go # to 11pm LAST week and not the 11pm from 2 hours ago. dt = TS.str_to_time(start) duration_min = TS.duration_parse(duration_string) endpoint = '%s-%s_%d.mp3' % (misc.config['callsign'], TS.ts_to_name(dt), duration_min) return send_stream(endpoint, download_name=endpoint) @app.route('/<weekday>/<start>/<duration_string>') def at_method2(weekday, start, duration_string): """ This is identical to the stream syntax, but instead it is similar to /at ... it uses the same notation but instead returns an audio file directly. You must specify a single weekday ... I know, total bummer. """ weekday_map = { 'mon': 'monday', 'tue': 'tuesday', 'wed': 'wednesday', 'thu': 'thursday', 'fri': 'friday', 'sat': 'saturday', 'sun': 'sunday' } # The alternative form for this is something like # /tuesday_8pm/1hr/showname.xml if duration_string.count('.') > 0: dt = TS.str_to_time(weekday) start_time = TS.extract_time(weekday) # order is a little incompatible. return stream(weekday=TS.to_minute(dt), start=start_time, duration_string=start, showname=duration_string) if weekday not in weekday_map: return 'The first parameter, %s, is not a recognized weekday.' % weekday return at("%s_%s" % (weekday_map[weekday], start), duration_string) @app.route('/<weekday>/<start>/<duration_string>/<showname>') def stream(weekday, start, duration_string, showname): """ Returns a podcast, m3u, pls or mp3 file based on the weekday, start and duration. This is designed to be read by podcasting software such as podkicker, itunes, and feedburner. weekdays are defined as mon, tue, wed, thu, fri, sat, sun. If a show occurs multiple times per week, this can be specified with a comma. for instance, /mon,tue,fri/4pm/1hr The showname should be followed by an xml, pls, m3u, or mp3 extension. In the case of using the .mp3 extension, it only returns the most recent episode. It should also be viewable in a modern web browser. If you can find a podcaster that's not supported, please send an email to [email protected]. """ if isinstance(weekday, (float)): start_time_list = [weekday] weekday_list = [TS.WEEKDAY_LIST[int(weekday / (60 * 24))]] else: # Supports multiple weekdays weekday_list = weekday.split(',') start_time_list = [TS.to_utc(day, start) for day in weekday_list] duration_min = TS.duration_parse(duration_string) # This means we failed to parse if not duration_min: return do_error("duration '%s' is not set correctly" % duration_string) if not isinstance(start_time_list[0], (int, float)): return do_error('weekday and start times are not set correctly') buffer_show = 2 # In #22 We're going to add 2 minutes to the duration to make sure that we get # the entire episode. duration_min += (buffer_show * 2) # And according to #149 we also go a minute back for the start time ... # we need to do a little math to make sure we don't get a -1 edge case start_time_list = [ (TS.MINUTES_PER_WEEK + offset - buffer_show) % TS.MINUTES_PER_WEEK for offset in start_time_list ] # If we are here then it looks like our input is probably good. # Strip the .xml from the showname ... this will be used in our xml. file_type = showname[-3:] showname = showname[:-4] # We come in with spaces as underscores so here we translate that back showname = re.sub('_', ' ', showname) # Make sure that we have all of our streams registered before trying # to infer what we can send to the user. cloud.register_stream_list() # Look for streams that we have which match this query and duration. # This will also create slices if necessary in a sub process. # The list of files that returns will include this not-yet-created # file-name as essentially a "promise" to when it will be made. feed_list = cloud.find_and_make_slices(start_time_list, duration_min) # print feed_list # Then, taking those two things, make a feed list from them. return generate_feed(file_type=file_type, showname=showname, feed_list=feed_list, duration_min=duration_min, weekday_list=weekday_list, start=start, duration_string=duration_string) if __name__ == 'lib.server': # When we do an upgrade or a restart, there's a race condition of getting to start this server # before the previous one has cleaned up all the socket work. So if the time is under our # patience threshold then we sleep a second and just try again, hoping that it will work. patience = misc.PROCESS_DELAY * 2 attempt = 1 start = TS.unixtime('delay') while TS.unixtime('delay') - start < (patience + 3): logging.info('Listening on %s' % config['port']) try: app.run(threaded=True, use_reloader=False, port=config['port'], host='0.0.0.0') break except Exception as exc: if TS.unixtime('delay') - start < patience: logging.info( '[attempt: %d] Error, can not start server ... perhaps %s is already in use?' % (attempt, config['port'])) attempt += 1 time.sleep(misc.PROCESS_DELAY / 4) elif TS.unixtime('delay') - start < (patience + 4): pid = os.popen( "netstat -anlp | grep :%s | awk ' { print $NF }' | sed 's/\/.*//'" % config['port']).read().strip().split('\n')[0] try: logging.info("F**k it, I'm killing pid %s." % pid) os.kill(int(pid), 15) except: pass time.sleep(misc.PROCESS_DELAY / 4)
def cback(data): global g_download_kill_pid """ if len(data): catchall('download', json.dumps([g_download_kill_pid, nl['pid'], len(data)])) else: catchall('download', json.dumps([g_download_kill_pid, 'no data'])) """ # print nl['pid'], g_download_kill_pid if nl['pid'] <= g_download_kill_pid or not data: logging.info("Stopping download #%d" % nl['pid']) return False # misc.params can fail based on a shutdown sequence. if misc is None or misc.params is None or not misc.manager_is_running(): # if misc is not None: # misc.shutdown() return False elif not misc.params['shutdown_time']: if not misc.download_ipc.empty(): what, value = misc.download_ipc.get(False) if what == 'shutdown_time': misc.params['shutdown_time'] = value elif TS.unixtime('dl') > misc.params['shutdown_time']: raise TypeError("Download Stop") if misc.params['isFirst'] == True: misc.params['isFirst'] = False if len(data) < 800: try: data_string = data.decode('utf-8') if re.match('https?://', data_string): # If we are getting a redirect then we don't mind, we # just put it in the stream and then we leave misc.queue.put(('stream', data_string.strip())) return False # A pls style playlist elif re.findall('File\d', data_string, re.M): logging.info('%d: Found a pls, using the File1 parameter' % (nl['pid'], )) matches = re.findall('File1=(.*)\n', data_string, re.M) misc.queue.put(('stream', matches[0].strip())) return False # If it gets here it's binary ... I guess that's fine. except: pass # This provides a reliable way to determine bitrate. We look at how much # data we've received between two time periods misc.queue.put(('heartbeat', (TS.unixtime('hb'), nl['pid'], len(data)))) if not nl['stream']: try: nl['stream'] = open(file_name, 'wb') except Exception as exc: logging.critical("%d: Unable to open %s. Can't record. Must exit." % (nl['pid'], file_name)) return False nl['stream'].write(data)
def stream_manager(): global g_download_kill_pid import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') if has_bitrate and int(has_bitrate) == 0: has_bitrate = False first_time = 0 total_bytes = 0 normalize_delay = 6 cycle_count = 0 cascade_time = misc.config['cascade_time'] cascade_buffer = misc.config['cascade_buffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 last_heartbeat = None change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycle_time'] process = None process_next = None # The manager will be the one that starts this. #server.manager(misc.config) webserver = Thread(target=server.manager, name='Webserver', args=(misc.config,)) webserver.start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) logging.info('Starting download #%d (%s). Next up in %ds' % (g_download_pid, file_name, cascade_margin)) process = Thread(target=stream_download, name='Download-%d:%s' % (g_download_pid, TS.ts_to_name()), args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.daemon = True process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration last_heartbeat_tid = -1 while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # if last_prune < (TS.unixtime('prune') - prune_duration): prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0) misc.prune_duration = prune_duration # We just assume it can do its business in under a day prune = cloud.prune() last_prune = TS.unixtime('prune') misc.last_prune = last_prune # Increment the amount of time this has been running if cycle_count % 30 == 0: # we only do these things occasionally, they # are either not very important or are not # expected to change that often TS.get_offset() cycle_count += 1 lr_set = False expired_heartbeat = last_heartbeat and time.time() - last_heartbeat > cycle_time * 2 while not misc.queue.empty(): what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We expire our heartbeat in order to force a new stream # to start expired_heartbeat = True elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap'] logging.info("Restart requested ... shutting down download at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) #misc.shutdown_real(do_restart=False) #misc.download_ipc.put(('shutdown_time', shutdown_time)) while True: time.sleep(5) with open(misc.PIDFILE_MANAGER, 'r') as f: manager_pid = f.read() #print manager_pid, os.getpid(), manager_pid == os.getpid() #logging.info(DB.get('runcount', use_cache=False)) #logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip()) if ps_out > 1: logging.info("Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put(('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn("Couldn't find a replacement process ... not going anywhere."); elif what == 'heartbeat': if not lr_set: lr_set = True last_heartbeat = time.time() last_heartbeat_tid = value[1] if last_heartbeat_tid < g_download_kill_pid: logging.warn("hb: Got a heartbeat for #%d but everything below #%d should be gone!" % (last_heartbeat_tid, g_download_kill_pid)) DB.set('last_recorded', time.time()) if not has_bitrate: margin = 60 # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[2] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + margin)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int( round (est / 1000) * 8 ) #print("Estimated bitrate:%d total:%d est:%d denom:%d" % (bitrate, total_bytes, est, value[0] - first_time - normalize_delay) ) if bitrate > 0: DB.set('bitrate', bitrate) has_bitrate = DB.get('bitrate') #if last_heartbeat: # logging.info("%d heartbeat %d" % (last_heartbeat, last_heartbeat_tid)) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running"); change_state = SHUTDOWN # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): misc.shutdown_real() else: if not process and not change_state: logging.info("Failed to find downloader, starting new one") file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next elif TS.unixtime('dl') - last_success > cascade_margin or expired_heartbeat: #logging.info("heartbeat expired %s %s %d %d %d" % (type(process_next), type(process), last_success, cascade_time, TS.unixtime('dl'))) # And we haven't created the next process yet, then we start it now. if not process_next: logging.info("Failed to find downloader, starting new one") file_name, process_next = download_start(file_name) # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and (expired_heartbeat or (TS.unixtime('dl') - last_success > cascade_time and process)): g_download_kill_pid += 1 #process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None time.sleep(cycle_time)
# servers and instances. DB.upgrade() del(DB.upgrade) DB.incr('runcount') # This is how we discover if we are the official server or not. # Look at the /uuid endpoint to see how this magic works. misc.config['uuid'] = os.popen('uuidgen').read().strip() signal.signal(signal.SIGINT, misc.shutdown_handler) signal.signal(signal.SIGUSR1, misc.shutdown_handler) signal.signal(signal.SIGHUP, misc.do_nothing) misc.IS_TEST = False misc.start_time = TS.unixtime() parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", default="./indy_config.txt", help="Configuration file (default ./indy_config.txt)") parser.add_argument('--debug', action='store_true', help="Load PDB for debugging") parser.add_argument('--version', action='version', version='indycast %s :: Aug 2015' % misc.__version__) parser.add_argument("--daemon", action='store_true', help="Run as daemon") args = parser.parse_args() if args.daemon: Popen( [x for x in sys.argv if x != '--daemon'] ) sys.exit(0) def new_sys_exit(value): pdb.set_trace() old_sys_exit(value) sys.exit = new_sys_exit
def manager(config): # Main flask process that manages the end points. app = Flask(__name__) def webserver_shutdown(signal=15, frame=None): logging.info('Shutting down webserver') request.environ.get('werkzeug.server.shutdown')() def success(message): return jsonify({'res': True, 'message': message}), 200 def fail(message): return jsonify({'res': False, 'message': message}), 500 # from http://blog.asgaard.co.uk/2012/08/03/http-206-partial-content-for-flask-python @app.after_request def after_request(response): # Supports 206 partial content requests for podcast streams. response.headers.add('Accept-Ranges', 'bytes') logging.info('ua - %s' % request.headers.get('User-Agent')) return response def send_file_partial(path, requested_path='', file_name=None): # Wrapper around send_file which handles HTTP 206 Partial Content # (byte ranges) # If we requested something that isn't around, then we bail. if not os.path.exists(path): return 'File %s not found. Perhaps the stream is old?' % requested_path, 404 range_header = request.headers.get('Range', None) if not range_header: with open(path, 'rb') as f: data = f.read() rv = Response( data, 200, mimetype=audio.our_mime(), direct_passthrough=True ) if not file_name: file_name = os.path.basename(path) rv.headers.add('Content-Disposition', 'attachment; filename="%s"' % file_name) return rv size = os.path.getsize(path) byte1, byte2 = 0, None m = re.search('(\d+)-(\d*)', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) if g[1]: byte2 = int(g[1]) length = size - byte1 if byte2 is not None: length = byte2 - byte1 data = None with open(path, 'rb') as f: f.seek(byte1) data = f.read(length + 1) rv = Response( data, 206, mimetype=audio.our_mime(), direct_passthrough=True ) disposition = 'attachment;' if file_name: disposition += ' file_name="%s"' % file_name rv.headers.add('Content-Disposition', disposition) rv.headers.add('Content-Range', 'bytes {0}-{1}/{2}'.format(byte1, byte1 + length, size)) return rv # From http://stackoverflow.com/questions/13317536/get-a-list-of-all-routes-defined-in-the-app @app.route("/help") def site_map(): """ Shows all the end points supported by the current server, the options and the documentation. """ output = ['-=#| Welcome to indycast %s API help |#=-' % misc.__version__, ''] for rule in app.url_map.iter_rules(): if rule.endpoint == 'static': continue options = {} for arg in rule.arguments: options[arg] = "{0}".format(arg) url = url_for(rule.endpoint, **options) line = "{} {}".format(url, app.view_functions[rule.endpoint].__doc__) output.append(line) output.append("") return Response('\n'.join(output), mimetype='text/plain') @app.route('/robots.txt') def robots(): """ Sends off robots.txt for crawlers """ return send_file('%s/robots.txt' % (misc.source_dir, )) @app.route('/uuid') def my_uuid(): """ Returns this server's uuid which is generated each time it is run. This is used to determine whether this is the official server or not. """ return misc.config['uuid'] @app.route('/db') def database(): """ Backs up the current sqlite3 db and sends a gzipped version of it as the response. """ filename = '%s/%s/%s-%s.gz' % (misc.config['storage'], misc.DIR_BACKUPS, misc.config['callsign'], time.strftime('%Y%m%d-%H%M', time.localtime())) os.popen('/usr/bin/sqlite3 config.db .dump | /bin/gzip -9 > %s' % filename) time.sleep(1) return send_file(filename) @app.route('/rename') def rename(): return cloud.rename() @app.route('/reindex') def reindex(): """ Starts the prune process which cleans up and offloads audio files but also re-index the database. This is useful in the cases where bugs have led to improper registration of the streams and a busted building of the database. It's fairly expensive in I/O costs so this shouldn't be done as the default. """ cloud.prune(reindex=True) return success('Reindexing...') @app.route('/prune') def prune(): """ Starts the prune sub-process which cleans up and offloads audio files following the rules outlined in the configuration file (viewable with the stats call) """ cloud.prune(force=True) return success('Pruning...') @app.route('/slices/<time>/<name>') def send_named_stream(time, name): """ Similar to the /slices/path endpoint, this end point sends a stream that is at time <time> with name <name>. """ return send_stream(time, download_name=name) @app.route('/slices/<path:path>') def send_stream(path, download_name=None): """ Downloads a stream from the server. The path is callsign-date_duration.mp3 * callsign: The callsign returned by /stats * date: in the format YYYYMMDDHHMM such as 201508011005 for 2015-08-01 10:05 * duration: A value, in minutes, to return. The mp3 extension should be used regardless of the actual format of the stream - although the audio returned will be in the streams' native format. The streams are created and sent on-demand, so there may be a slight delay before it starts. """ DB.incr('hits-dl') base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES) if not path.startswith(config['callsign']): path = "%s-%s" % (config['callsign'], path) if not path.endswith('.mp3'): path = "%s.mp3" % path file_name = base_dir + path # If the file doesn't exist, then we need to slice it and create it based on our query. # Also, if it's a zero byte file, then we try to create it again. if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0: cloud.register_stream_list() # This tells us that if it were to exist, it would be something # like this. request_info = audio.stream_info(file_name) logging.info(("expected value", request_info)) # we can do something rather specific here ... # # first we get our generic stream list using our start_minute from the info. stream_list, episode_list = cloud.find_streams(start_list=[request_info['start_minute']], duration_min=request_info['duration_sec'] / 60.0) for ep in episode_list: episode = ep[0] first_slice = episode[0] if first_slice['week_number'] == request_info['week_number']: # This means that we've found the episode that we want # We will block on this. relative_start_minute = request_info['start_minute'] - first_slice['start_minute'] logging.info(episode) audio.stitch_and_slice_process(file_list=episode, relative_start_minute=relative_start_minute, duration_minute=request_info['duration_sec'] / 60.0, destination_path=path) # And break out of our loop ... now everything should exist. break return send_file_partial("%s/%s" % (base_dir, path), requested_path=path, file_name=download_name) @app.route('/halt') def halt(): """ Stops the webserver. This request must be issued from the localhost in order to succeed. """ if request.remote_addr == '127.0.0.1': webserver_shutdown() misc.shutdown(do_restart=False) return success('halt...') else: return fail('halt aborted. Must be requested from the localhost') @app.route('/restart') def restart(): """ Restarts an instance. This does so in a gapless non-overlapping way. """ webserver_shutdown() misc.shutdown(do_restart=True) return success('restarting...') @app.route('/dolist') def dolist(): return success(misc.queue_dbg()) @app.route('/upgrade') def upgrade(): """ Goes to the source directory, pulls down the latest from git and if the versions are different, the application restarts. """ cwd = os.getcwd() os.chdir(misc.source_dir) os.system('/usr/bin/git pull') # See what the version is after the pull newversion = os.popen("/usr/bin/git describe").read().strip() if newversion != misc.__version__: os.system('/usr/local/bin/pip install --user -r requirements.txt') # from http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/ misc.shutdown(do_restart=True) return success("Upgrading from %s to %s" % (misc.__version__, newversion)) os.chdir(cwd) return success('Version %s is current' % misc.__version__) @app.route('/heartbeat') def heartbeat(): """ A low resource version of the /stats call ... this is invoked by the server health check. Only the vitals are reported. It helps us see if disk space is going nuts or if we aren't recording right now. This allows us to check if a restart happened between invocations. """ return jsonify(misc.base_stats()), 200 @app.route('/stats') def stats(): """ Reports various statistical metrics on a particular server. Use this with the graph.py tool to see station coverage. """ misc.am_i_official() stats = misc.base_stats() lockMap = {} for k, v in misc.lockMap.items(): lock = v.acquire(False) lockMap[k] = True if lock: lockMap[k] = False v.release() stats.update({ 'kv': DB.all('kv'), 'locks': lockMap, 'pwd': os.getcwd(), 'free': os.popen("/bin/df -h / | /usr/bin/tail -1").read().strip(), # Reporting the list as fractional GB is more useful. 'streams': DB.all('streams', sort_by='start_unix'), 'config': misc.public_config() }) return jsonify(stats), 200 # Using http://flask.pocoo.org/docs/0.10/patterns/streaming/ as a reference. @app.route('/live/<start>') def live(start, offset_min=0): """ Sends off a live-stream equivalent. Two formats are supported: * duration - In the form of strings such as "1pm" or "2:30pm" * offset - starting with a negative "-", this means "from the present". For instance, to start the stream from 5 minutes ago, you can do "-5" """ DB.incr('hits-live') if start[0] == '-' or start.endswith('min'): # dump things like min or m start = re.sub('[a-z]', '', start) return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302) # The start is expressed in times like "11:59am ..." We utilize the # library we wrote for streaming to get the minute of day this is. if start[0] == 'm': requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE else: candidate = start requested_minute = TS.to_utc('mon', candidate) - offset_min offset_sec = 0 range_header = request.headers.get('Range', None) if range_header: m = re.search('(\d+)-', range_header) g = m.groups() if g[0]: byte1 = int(g[0]) # We use the byte to compute the offset offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0)) #print "--- REQUEST @ ", start, range_header, offset_sec current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE now_time = TS.now() requested_time = now_time - timedelta(minutes=current_minute) + timedelta(minutes=requested_minute) # print requested_time, now_time, requested_minute, current_minute # If the requested minute is greater than the current one, then we can presume that # the requested minute refers to yesterday ... as in, someone wants 11pm # and now it's 1am. if requested_minute > current_minute: requested_time -= timedelta(days=1) # It's important to do this AFTER the operation above otherwise we wrap around to yesterday requested_time += timedelta(seconds=offset_sec) # Get the info for the file that contains this timestamp start_info, requested_time_available = cloud.get_file_for_ts(target_time=requested_time, bias=-1) if start_info is None or requested_time_available is None: return do_error("Can't find any matching files") requested_time = max(requested_time, requested_time_available) start_second = (requested_time - start_info['start_date']).total_seconds() response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime()) return response @app.route('/at/<start>/<duration_string>') def at(start, duration_string='1hr'): """ Sends a stream using a human-readable (and human-writable) definition at start time. This uses the dateutils.parser library and so strings such as "Monday 2pm" are accepted. Because the space, 0x20 is such a pain in HTTP, you can use "_", "-" or "+" to signify it. For instance, /at/monday_2pm/1hr Will work fine """ # If it's say 1am, and I request 11pm without a day specification, it will go # to 11pm LAST week and not the 11pm from 2 hours ago. dt = TS.str_to_time(start) duration_min = TS.duration_parse(duration_string) endpoint = '%s-%s_%d.mp3' % (misc.config['callsign'], TS.ts_to_name(dt), duration_min) return send_stream(endpoint, download_name=endpoint) @app.route('/<weekday>/<start>/<duration_string>') def at_method2(weekday, start, duration_string): """ This is identical to the stream syntax, but instead it is similar to /at ... it uses the same notation but instead returns an audio file directly. You must specify a single weekday ... I know, total bummer. """ weekday_map = { 'mon': 'monday', 'tue': 'tuesday', 'wed': 'wednesday', 'thu': 'thursday', 'fri': 'friday', 'sat': 'saturday', 'sun': 'sunday' } # The alternative form for this is something like # /tuesday_8pm/1hr/showname.xml if duration_string.count('.') > 0: dt = TS.str_to_time(weekday) start_time = TS.extract_time(weekday) # order is a little incompatible. return stream(weekday=TS.to_minute(dt), start=start_time, duration_string=start, showname=duration_string) if weekday not in weekday_map: return 'The first parameter, %s, is not a recognized weekday.' % weekday return at("%s_%s" % (weekday_map[weekday], start), duration_string) @app.route('/<weekday>/<start>/<duration_string>/<showname>') def stream(weekday, start, duration_string, showname): """ Returns a podcast, m3u, pls, html or mp3 file based on the weekday, start and duration. This is designed to be read by podcasting software such as podkicker, itunes, and feedburner. The default format if nothing is specified is XML. weekdays are defined as mon, tue, wed, thu, fri, sat, sun. If a show occurs multiple times per week, this can be specified with a comma. for instance, /mon,tue,fri/4pm/1hr The showname should be followed by an xml, pls, m3u, or mp3 extension. In the case of using the .mp3 extension, it only returns the most recent episode. It should also be viewable in a modern web browser. If you can find a podcaster that's not supported, please send an email to [email protected]. """ if isinstance(weekday, (float)): start_time_list = [weekday] weekday_list = [ TS.WEEKDAY_LIST[ int(weekday / (60 * 24)) ] ] else: # Supports multiple weekdays weekday_list = weekday.split(',') start_time_list = [TS.to_utc(day, start) for day in weekday_list] duration_min = TS.duration_parse(duration_string) # This means we failed to parse if not duration_min: return do_error("duration '%s' is not set correctly" % duration_string) if not isinstance(start_time_list[0], (int, float)): return do_error('weekday and start times are not set correctly') buffer_show = 2 # In #22 We're going to add 2 minutes to the duration to make sure that we get # the entire episode. duration_min += (buffer_show * 2) # And according to #149 we also go a minute back for the start time ... # we need to do a little math to make sure we don't get a -1 edge case start_time_list = [(TS.MINUTES_PER_WEEK + offset - buffer_show) % TS.MINUTES_PER_WEEK for offset in start_time_list] # If we are here then it looks like our input is probably good. # Strip the .xml from the showname ... this will be used in our xml. parts = showname.split('.') file_type = parts.pop() showname = '.'.join(parts) # We come in with spaces as underscores so here we translate that back showname = re.sub('_', ' ', showname) # Make sure that we have all of our streams registered before trying # to infer what we can send to the user. cloud.register_stream_list() # Look for streams that we have which match this query and duration. # This will also create slices if necessary in a sub process. # The list of files that returns will include this not-yet-created # file-name as essentially a "promise" to when it will be made. feed_list = cloud.find_and_make_slices(start_time_list, duration_min) # print feed_list # Then, taking those two things, make a feed list from them. return generate_feed( file_type=file_type, showname=showname, feed_list=feed_list, duration_min=duration_min, weekday_list=weekday_list, start=start, duration_string=duration_string ) if __name__ == 'lib.server': # When we do an upgrade or a restart, there's a race condition of getting to start this server # before the previous one has cleaned up all the socket work. So if the time is under our # patience threshold then we sleep a second and just try again, hoping that it will work. patience = misc.PROCESS_DELAY * 2 attempt = 1 start = TS.unixtime('delay') while TS.unixtime('delay') - start < (patience + 3): logging.info('Listening on %s' % config['port']) try: app.run(threaded=True, use_reloader=False, port=config['port'], host='0.0.0.0') break except Exception as exc: if TS.unixtime('delay') - start < patience: logging.info('[attempt: %d] Error, can not start server ... perhaps %s is already in use?' % (attempt, config['port'])) attempt += 1 time.sleep(misc.PROCESS_DELAY / 4) elif TS.unixtime('delay') - start < (patience + 4): pid = os.popen("netstat -anlp | grep :%s | awk ' { print $NF }' | sed 's/\/.*//'" % config['port']).read().strip().split('\n')[0] try: logging.info("F**k it, I'm killing pid %s." % pid) os.kill(int(pid), 15) except: pass time.sleep(misc.PROCESS_DELAY / 4)
def prune_process(reindex=False, force=False): import lib.misc as misc # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not misc.lockMap['prune'].acquire(False) and not force: logging.warn("Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) #pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except Exception as e: logging.info("Wasn't able to register streams: %s" % e) misc.lockMap['prune'].release() return None archive_duration = misc.config['cloud_archive'] cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime('prune') - misc.config['disk_archive'] # Put thingies into the cloud. count_cloud = 0 count_delete = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): misc.lockMap['prune'].release() return None if not os.path.exists(file_name): continue ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archive'], misc.am_i_official() # We observe the rules set up in the config. logging.debug("%s cloud:%d ctime:%d slice:%d cutoff:%d ctime-cloud:%d ctime-slice:%d" %(file_name, cloud_cutoff, ctime, slice_cutoff, cutoff, ctime-cloud_cutoff, ctime-slice_cutoff )) if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count_delete += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice'): logging.debug("Prune[cloud]: %s" % file_name) # <s>Only unlink the file if I can successfully put it into the cloud.</s> # # Actually, policy change: # We should dump the file regardless because otherwise we would smash the disk # AS HAS HAPPENED MULTIPLE TIMES # # Then you have an irrelevant past build up a forced discarding of the desired # future ... just like with life itself. # res = put(file_name) if misc.am_i_official(): try: os.unlink(file_name) # This is only a self-reporting system... we can use our success code for # our honesty here. if res: count_cloud += 1 except Exception as e: logging.debug("Prune[cloud]: Couldn't remove {}: {}".format(file_name, e)) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count_delete += 1 # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = DB.run('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] #logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud.";unlink(file_name) # After we remove these streams then we delete them from the db. DB.run('delete from streams where id = %d' % id) # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count_delete += 1 logging.info("Deleted %d files and put %d on the cloud." % (count_delete, count_cloud)) misc.lockMap['prune'].release()
def stream_manager(): import random # Manager process which makes sure that the # streams are running appropriately. callsign = misc.config['callsign'] # # AAC bitrate is some non-trivial thing that even ffprobe doesn't # do a great job at. This solution looks at number of bits that # transit over the wire given a duration of time, and then uses # that to compute the bitrate, since in practice, that's what # bitrate effectively means, and why it's such an important metric. # # This is to compute a format agnostic bitrate # (see heartbeat for more information) # has_bitrate = DB.get('bitrate') first_time = 0 total_bytes = 0 normalize_delay = 6 cascade_time = misc.config['cascadetime'] cascade_buffer = misc.config['cascadebuffer'] cascade_margin = cascade_time - cascade_buffer last_prune = 0 last_success = 0 change_state = None SHUTDOWN = 1 RESTART = 2 shutdown_time = None misc.download_ipc = Queue() # Number of seconds to be cycling cycle_time = misc.config['cycletime'] process = None process_next = None # The manager will be the one that starts this. misc.pid_map['webserver'] = Process(target=server.manager, args=(misc.config, )) misc.pid_map['webserver'].start() file_name = None # A wrapper function to start a donwnload process def download_start(file_name): """ Starts a process that manages the downloading of a stream. """ global g_download_pid g_download_pid += 1 logging.info('Starting cascaded downloader #%d. Next up in %ds' % (g_download_pid, cascade_margin)) # # There may be a multi-second lapse time from the naming of the file to # the actual start of the download so we should err on that side by putting it # in the future by some margin # file_name = '%s/%s-%s.mp3' % ( misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2))) process = Process(target=stream_download, args=(callsign, misc.config['stream'], g_download_pid, file_name)) process.start() return [file_name, process] # see https://github.com/kristopolous/DRR/issues/91: # Randomize prune to offload disk peaks prune_duration = misc.config['pruneevery'] + (1 / 8.0 - random.random() / 4.0) while True: # # We cycle this to off for every run. By the time we go throug the queue so long # as we aren't supposed to be shutting down, this should be toggled to true. # flag = False if last_prune < (TS.unixtime('prune') - TS.ONE_DAY_SECOND * prune_duration): prune_duration = misc.config['pruneevery'] + ( 1 / 8.0 - random.random() / 4.0) # We just assume it can do its business in under a day misc.pid_map['prune'] = cloud.prune() last_prune = TS.unixtime('prune') TS.get_offset() lr_set = False while not misc.queue.empty(): flag = True what, value = misc.queue.get(False) # The curl proces discovered a new stream to be # used instead. if what == 'stream': misc.config['stream'] = value logging.info("Using %s as the stream now" % value) # We now don't toggle to flag in order to shutdown the # old process and start a new one elif what == 'db-debug': DB.debug() elif what == 'shutdown': change_state = SHUTDOWN elif what == 'restart': logging.info(DB.get('runcount', use_cache=False)) cwd = os.getcwd() os.chdir(misc.PROCESS_PATH) Popen(sys.argv) os.chdir(cwd) change_state = RESTART # Try to record for another restart_overlap seconds - make sure that # we don't perpetually put this in the future due to some bug. if not shutdown_time: shutdown_time = TS.unixtime( 'dl') + misc.config['restart_overlap'] logging.info( "Restart requested ... shutting down downloader at %s" % TS.ts_to_name(shutdown_time, with_seconds=True)) while True: time.sleep(20) #logging.info(DB.get('runcount', use_cache=False)) logging.info( ('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) ps_out = int( os.popen( 'ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:])).read().strip()) if ps_out > 1: logging.info( "Found %d potential candidates (need at least 2)" % ps_out) # This makes it a restricted soft shutdown misc.shutdown_real(do_restart=True) misc.download_ipc.put( ('shutdown_time', shutdown_time)) break else: Popen(sys.argv) logging.warn( "Couldn't find a replacement process ... not going anywhere." ) elif what == 'heartbeat': if not lr_set and value[1] > 100: lr_set = True DB.set('last_recorded', time.time()) if not has_bitrate: # Keep track of the first time this stream started (this is where our total # byte count is derived from) if not first_time: first_time = value[0] # # Otherwise we give a large (in computer time) margin of time to confidently # guess the bitrate. I didn't do great at stats in college, but in my experiments, # the estimation falls within 98% of the destination. I'm pretty sure it's really # unlikely this will come out erroneous, but I really can't do the math, it's probably # a T value, but I don't know. Anyway, whatevs. # # The normalize_delay here is for both he-aac+ streams which need to put in some frames # before the quantizing pushes itself up and for other stations which sometimes put a canned # message at the beginning of the stream, like "Live streaming supported by ..." # # Whe we discount the first half-dozen seconds as not being part of the total, we get a # stabilizing convergence far quicker. # elif (value[0] - first_time > normalize_delay): # If we haven't determined this stream's bitrate (which we use to estimate # the amount of content is in a given archived stream), then we compute it # here instead of asking the parameters of a given block and then presuming. total_bytes += value[1] # We still give it a time period after the normalizing delay in order to build enough # samples to make a solid guess at what this number should be. if (value[0] - first_time > (normalize_delay + 60)): # We take the total bytes, calculate it over our time, in this case, 25 seconds. est = total_bytes / (value[0] - first_time - normalize_delay) # We find the nearest 8Kb increment this matches and then scale out. # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_. bitrate = int(round(est / 1000) * 8) DB.set('bitrate', bitrate) # Check for our management process if not misc.manager_is_running(): logging.info("Manager isn't running") change_state = SHUTDOWN # The only way for the bool to be toggled off is if we are not in full-mode ... # we get here if we should NOT be recording. So we make sure we aren't. if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time): process = my_process_shutdown(process) process_next = my_process_shutdown(process_next) misc.shutdown_real() else: # Didn't respond in cycle_time seconds so kill it if not flag: process = my_process_shutdown(process) if not process and not change_state: file_name, process = download_start(file_name) last_success = TS.unixtime('dl') # If we've hit the time when we ought to cascade elif TS.unixtime('dl') - last_success > cascade_margin: # And we haven't created the next process yet, then we start it now. if not process_next: file_name, process_next = download_start(file_name) # If our last_success stream was more than cascade_time - cascade_buffer # then we start our process_next # If there is still no process then we should definitely bail. if not process: misc.shutdown_real() # # This needs to be on the outside loop in case we are doing a cascade # outside of a full mode. In this case, we will need to shut things down # # If we are past the cascade_time and we have a process_next, then # we should shutdown our previous process and move the pointers around. # if not change_state and TS.unixtime( 'dl') - last_success > cascade_time and process: logging.info("Stopping cascaded downloader") process.terminate() # If the process_next is running then we move our last_success forward to the present last_success = TS.unixtime('dl') # we rename our process_next AS OUR process process = process_next # and then clear out the old process_next pointer process_next = None # Increment the amount of time this has been running DB.incr('uptime', cycle_time) time.sleep(cycle_time)
signal.signal(signal.SIGINT, misc.shutdown_handler) signal.signal(signal.SIGUSR1, misc.shutdown_handler) signal.signal(signal.SIGHUP, misc.do_nothing) if __name__ == "__main__": # From http://stackoverflow.com/questions/25504149/why-does-running-the-flask-dev-server-run-itself-twice if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': server_manager(misc.config) else: # Ignore all test scaffolding misc.IS_TEST = False misc.start_time = TS.unixtime() parser = argparse.ArgumentParser() parser.add_argument( "-c", "--config", default="./indy_config.txt", help="Configuration file (default ./indy_config.txt)") parser.add_argument('--version', action='version', version='indycast %s :: Aug 2015' % misc.__version__) parser.add_argument("--daemon", action='store_true', help="run as daemon") args = parser.parse_args()
def prune_process(reindex=False, force=False): import lib.misc as misc # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not misc.lockMap['prune'].acquire(False) and not force: logging.warn( "Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) #pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except Exception as e: logging.info("Wasn't able to register streams: %s" % e) misc.lockMap['prune'].release() return None archive_duration = misc.config['cloud_archive'] cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime('prune') - misc.config['disk_archive'] # Put thingies into the cloud. count_cloud = 0 count_delete = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): misc.lockMap['prune'].release() return None if not os.path.exists(file_name): continue ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archive'], misc.am_i_official() # We observe the rules set up in the config. logging.debug( "%s cloud:%d ctime:%d slice:%d cutoff:%d ctime-cloud:%d ctime-slice:%d" % (file_name, cloud_cutoff, ctime, slice_cutoff, cutoff, ctime - cloud_cutoff, ctime - slice_cutoff)) if file_name.startswith( 'slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count_delete += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith( 'slice'): logging.debug("Prune[cloud]: %s" % file_name) # <s>Only unlink the file if I can successfully put it into the cloud.</s> # # Actually, policy change: # We should dump the file regardless because otherwise we would smash the disk # AS HAS HAPPENED MULTIPLE TIMES # # Then you have an irrelevant past build up a forced discarding of the desired # future ... just like with life itself. # res = put(file_name) if misc.am_i_official(): try: os.unlink(file_name) # This is only a self-reporting system... we can use our success code for # our honesty here. if res: count_cloud += 1 except Exception as e: logging.debug( "Prune[cloud]: Couldn't remove {}: {}".format( file_name, e)) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count_delete += 1 # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = DB.run( 'select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] #logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud." unlink(file_name) # After we remove these streams then we delete them from the db. DB.run('delete from streams where id = %d' % id) # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count_delete += 1 logging.info("Deleted %d files and put %d on the cloud." % (count_delete, count_cloud)) misc.lockMap['prune'].release()
def prune_process(lockMap, reindex=False, force=False): # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not lockMap['prune'].acquire(False) and not force: logging.warn("Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except: lockMap['prune'].release() return None db = DB.connect() archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime('prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND # Put thingies into the cloud. count = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): lockMap['prune'].release() return None ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archivedays'], misc.am_i_official() # We observe the rules set up in the config. if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice') and misc.am_i_official(): logging.debug("Prune[cloud]: %s" % file_name) # Only unlink the file if I can successfully put it into the cloud. if put(file_name): try: os.unlink(file_name) except: logging.debug("Prune[cloud]: Couldn't remove %s" % file_name) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count += 1 # The map names are different since there may or may not be a corresponding # cloud thingie associated with it. db = DB.connect() # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = db['c'].execute('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud.";unlink(file_name) # After we remove these streams then we delete them from the db. db['c'].execute('delete from streams where id = %d' % id) db['conn'].commit() # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count += 1 logging.info("Found %d files older than %s days." % (count, misc.config['archivedays'])) lockMap['prune'].release()
def prune_process(lockMap, reindex=False, force=False): # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not lockMap['prune'].acquire(False) and not force: logging.warn( "Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except: lockMap['prune'].release() return None db = DB.connect() archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime( 'prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND # Put thingies into the cloud. count = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): lockMap['prune'].release() return None ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archivedays'], misc.am_i_official() # We observe the rules set up in the config. if file_name.startswith( 'slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith( 'slice') and misc.am_i_official(): logging.debug("Prune[cloud]: %s" % file_name) # Only unlink the file if I can successfully put it into the cloud. if put(file_name): try: os.unlink(file_name) except: logging.debug("Prune[cloud]: Couldn't remove %s" % file_name) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count += 1 # The map names are different since there may or may not be a corresponding # cloud thingie associated with it. db = DB.connect() # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = db['c'].execute( 'select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud." unlink(file_name) # After we remove these streams then we delete them from the db. db['c'].execute('delete from streams where id = %d' % id) db['conn'].commit() # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count += 1 logging.info("Found %d files older than %s days." % (count, misc.config['archivedays'])) lockMap['prune'].release()
def cback(data): global g_download_kill_pid """ if len(data): catchall('download', json.dumps([g_download_kill_pid, nl['pid'], len(data)])) else: catchall('download', json.dumps([g_download_kill_pid, 'no data'])) """ # print nl['pid'], g_download_kill_pid if nl['pid'] <= g_download_kill_pid or not data: logging.info("Stopping download #%d" % nl['pid']) return False # misc.params can fail based on a shutdown sequence. if misc is None or misc.params is None or not misc.manager_is_running( ): # if misc is not None: # misc.shutdown() return False elif not misc.params['shutdown_time']: if not misc.download_ipc.empty(): what, value = misc.download_ipc.get(False) if what == 'shutdown_time': misc.params['shutdown_time'] = value elif TS.unixtime('dl') > misc.params['shutdown_time']: raise TypeError("Download Stop") if misc.params['isFirst'] == True: misc.params['isFirst'] = False if len(data) < 800: try: data_string = data.decode('utf-8') if re.match('https?://', data_string): # If we are getting a redirect then we don't mind, we # just put it in the stream and then we leave misc.queue.put(('stream', data_string.strip())) return False # A pls style playlist elif re.findall('File\d', data_string, re.M): logging.info( '%d: Found a pls, using the File1 parameter' % (nl['pid'], )) matches = re.findall('File1=(.*)\n', data_string, re.M) misc.queue.put(('stream', matches[0].strip())) return False # If it gets here it's binary ... I guess that's fine. except: pass # This provides a reliable way to determine bitrate. We look at how much # data we've received between two time periods misc.queue.put( ('heartbeat', (TS.unixtime('hb'), nl['pid'], len(data)))) if not nl['stream']: try: nl['stream'] = open(file_name, 'wb') except Exception as exc: logging.critical( "%d: Unable to open %s. Can't record. Must exit." % (nl['pid'], file_name)) return False nl['stream'].write(data)