Example #1
0
def base_stats():
  # Reports base-level statistical information about the health of the server.
  # This is used for the /stats and /heartbeat call.
  try:
    # for some reason this can lead to a memory error
    load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')]

  except:
    load = 0

  uptime = TS.uptime()
  return {
    'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ),
    'human-now': TS.ts_to_name(),
    'computer-uptime': uptime,
    'computer-now': time.time(),
    'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0),
    'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0],
    'version': __version__,
    'uuid': config['uuid'],
    'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 
    'load': load,
    'files': [m.path for m in psutil.Process().open_files()],
    'connections': len(psutil.Process().connections()),
    'memory': [
      # Current memory footprint in MB
      psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), 
      
      # Maximum lifetime memory footpring in MB
      resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
    ],
    'threads': [ thread.name for thread in threading.enumerate() ],
    'disk': cloud.size('.') / (1024.0 ** 3)
  }
Example #2
0
def base_stats():
  # Reports base-level statistical information about the health of the server.
  # This is used for the /stats and /heartbeat call.
  try:
    # for some reason this can lead to a memory error
    load = [float(unit) for unit in os.popen("/usr/bin/uptime | awk -F : ' { print $NF } '").read().split(', ')]

  except:
    load = 0

  uptime = TS.uptime()
  return {
    'human-uptime': "%dd %02d:%02d:%02d" % ( uptime / TS.ONE_DAY_SECOND, (uptime / TS.ONE_HOUR_SECOND) % 24, (uptime / 60) % 60, uptime % 60 ),
    'human-now': TS.ts_to_name(),
    'computer-uptime': uptime,
    'computer-now': time.time(),
    'last-recorded': float(DB.get('last_recorded', use_cache=False) or 0),
    'hits': DB.run('select sum(value) from kv where key like "%hit%"').fetchone()[0],
    'version': __version__,
    'uuid': config['uuid'],
    'next-prune': int(last_prune - (TS.unixtime('prune') - prune_duration)), 
    'load': load,
    'files': [m.path for m in psutil.Process().open_files()],
    'connections': len(psutil.Process().connections()),
    'memory': [
      # Current memory footprint in MB
      psutil.Process(os.getpid()).memory_info().rss / (1024.0 * 1024), 
      
      # Maximum lifetime memory footpring in MB
      resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
    ],
    'threads': [ thread.name for thread in threading.enumerate() ],
    'disk': cloud.size('.') / (1024.0 ** 3)
  }
Example #3
0
    def cback(data):

        if not misc.params['shutdown_time']:
            if not misc.download_ipc.empty():
                what, value = misc.download_ipc.get(False)
                if what == 'shutdown_time':
                    misc.params['shutdown_time'] = value

        elif TS.unixtime('dl') > misc.params['shutdown_time']:
            sys.exit(0)

        if misc.params['isFirst'] == True:
            misc.params['isFirst'] = False

            if len(data) < 800:
                if re.match('https?://', data):
                    # If we are getting a redirect then we don't mind, we
                    # just put it in the stream and then we leave
                    misc.queue.put(('stream', data.strip()))
                    return True

                # A pls style playlist
                elif re.findall('File\d', data, re.M):
                    logging.info('Found a pls, using the File1 parameter')
                    matches = re.findall('File1=(.*)\n', data, re.M)
                    misc.queue.put(('stream', matches[0].strip()))
                    return True

        # This provides a reliable way to determine bitrate.  We look at how much
        # data we've received between two time periods
        misc.queue.put(('heartbeat', (TS.unixtime('hb'), len(data))))

        if not nl['stream']:
            try:
                nl['stream'] = open(file_name, 'w')

            except Exception as exc:
                logging.critical(
                    "Unable to open %s. Can't record. Must exit." % file_name)
                sys.exit(-1)

        nl['stream'].write(data)

        if not misc.manager_is_running():
            misc.shutdown()
Example #4
0
  def cback(data): 

    if not misc.params['shutdown_time']:
      if not misc.download_ipc.empty():
        what, value = misc.download_ipc.get(False)
        if what == 'shutdown_time':
          misc.params['shutdown_time'] = value

    elif TS.unixtime('dl') > misc.params['shutdown_time']:
      sys.exit(0)

    if misc.params['isFirst'] == True:
      misc.params['isFirst'] = False

      if len(data) < 800:
        if re.match('https?://', data):
          # If we are getting a redirect then we don't mind, we
          # just put it in the stream and then we leave
          misc.queue.put(('stream', data.strip()))
          return True

        # A pls style playlist
        elif re.findall('File\d', data, re.M):
          logging.info('Found a pls, using the File1 parameter')
          matches = re.findall('File1=(.*)\n', data, re.M)
          misc.queue.put(('stream', matches[0].strip()))
          return True

    # This provides a reliable way to determine bitrate.  We look at how much 
    # data we've received between two time periods
    misc.queue.put(('heartbeat', (TS.unixtime('hb'), len(data))))

    if not nl['stream']:
      try:
        nl['stream'] = open(file_name, 'w')

      except Exception as exc:
        logging.critical("Unable to open %s. Can't record. Must exit." % file_name)
        sys.exit(-1)

    nl['stream'].write(data)

    if not misc.manager_is_running():
      misc.shutdown()
Example #5
0
def manager(config):
    # Main flask process that manages the end points.
    app = Flask(__name__)

    def webserver_shutdown(signal=15, frame=None):
        logging.info('Shutting down webserver')
        request.environ.get('werkzeug.server.shutdown')()

    def success(message):
        return jsonify({'res': True, 'message': message}), 200

    def fail(message):
        return jsonify({'res': False, 'message': message}), 500

    # from http://blog.asgaard.co.uk/2012/08/03/http-206-partial-content-for-flask-python
    @app.after_request
    def after_request(response):
        # Supports 206 partial content requests for podcast streams.
        response.headers.add('Accept-Ranges', 'bytes')
        logging.info('ua - %s' % request.headers.get('User-Agent'))
        return response

    def send_file_partial(path, requested_path='', file_name=None):
        # Wrapper around send_file which handles HTTP 206 Partial Content
        # (byte ranges)

        # If we requested something that isn't around, then we bail.
        if not os.path.exists(path):
            return 'File %s not found. Perhaps the stream is old?' % requested_path, 404

        range_header = request.headers.get('Range', None)
        if not range_header:
            with open(path, 'rb') as f:
                data = f.read()

            rv = Response(data,
                          200,
                          mimetype=audio.our_mime(),
                          direct_passthrough=True)
            if not file_name:
                file_name = os.path.basename(path)

            rv.headers.add('Content-Disposition',
                           'attachment; filename="%s"' % file_name)
            return rv

        size = os.path.getsize(path)
        byte1, byte2 = 0, None

        m = re.search('(\d+)-(\d*)', range_header)
        g = m.groups()

        if g[0]:
            byte1 = int(g[0])

        if g[1]:
            byte2 = int(g[1])

        length = size - byte1
        if byte2 is not None:
            length = byte2 - byte1

        data = None
        with open(path, 'rb') as f:
            f.seek(byte1)
            data = f.read(length + 1)

        rv = Response(data,
                      206,
                      mimetype=audio.our_mime(),
                      direct_passthrough=True)
        disposition = 'attachment;'

        if file_name:
            disposition += ' file_name="%s"' % file_name

        rv.headers.add('Content-Disposition', disposition)
        rv.headers.add('Content-Range',
                       'bytes {0}-{1}/{2}'.format(byte1, byte1 + length, size))

        return rv

    # From http://stackoverflow.com/questions/13317536/get-a-list-of-all-routes-defined-in-the-app
    @app.route("/help")
    def site_map():
        """ 
    Shows all the end points supported by the current server, the options 
    and the documentation.
    """
        output = [
            '-=#| Welcome to indycast %s API help |#=-' % misc.__version__, ''
        ]

        for rule in app.url_map.iter_rules():

            if rule.endpoint == 'static': continue

            options = {}
            for arg in rule.arguments:
                options[arg] = "{0}".format(arg)

            url = url_for(rule.endpoint, **options)
            line = "{} {}".format(url,
                                  app.view_functions[rule.endpoint].__doc__)
            output.append(line)
            output.append("")

        return Response('\n'.join(output), mimetype='text/plain')

    @app.route('/robots.txt')
    def robots():
        """
    Sends off robots.txt for crawlers
    """
        return send_file('%s/robots.txt' % (misc.source_dir, ))

    @app.route('/uuid')
    def my_uuid():
        """ 
    Returns this server's uuid which is generated each time it is run.
    This is used to determine whether this is the official server or not.
    """
        return misc.config['uuid']

    @app.route('/db')
    def database():
        """ 
    Backs up the current sqlite3 db and sends a gzipped version of it as the response.
    """
        filename = '%s/%s/%s-%s.gz' % (
            misc.config['storage'], misc.DIR_BACKUPS, misc.config['callsign'],
            time.strftime('%Y%m%d-%H%M', time.localtime()))
        os.popen('/usr/bin/sqlite3 config.db .dump | /bin/gzip -9 > %s' %
                 filename)
        time.sleep(1)
        return send_file(filename)

    @app.route('/rename')
    def rename():
        return cloud.rename()

    @app.route('/reindex')
    def reindex():
        """ 
    Starts the prune process which cleans up and offloads audio files but also re-index 
    the database.

    This is useful in the cases where bugs have led to improper registration of the 
    streams and a busted building of the database.  It's fairly expensive in I/O costs 
    so this shouldn't be done as the default.
    """
        cloud.prune(reindex=True)
        return success('Reindexing...')

    @app.route('/prune')
    def prune():
        """ 
    Starts the prune sub-process which cleans up and offloads audio files 
    following the rules outlined in the configuration file (viewable with the stats call)
    """
        cloud.prune(force=True)
        return success('Pruning...')

    @app.route('/slices/<time>/<name>')
    def send_named_stream(time, name):
        """
    Similar to the /slices/path endpoint, this end point sends a stream that is at time <time> with
    name <name>.
    """
        return send_stream(time, download_name=name)

    @app.route('/slices/<path:path>')
    def send_stream(path, download_name=None):
        """
    Downloads a stream from the server. The path is callsign-date_duration.mp3

      * callsign: The callsign returned by /stats
      * date: in the format YYYYMMDDHHMM such as 201508011005 for 
        2015-08-01 10:05
      * duration: A value, in minutes, to return.

    The mp3 extension should be used regardless of the actual format of the stream -
    although the audio returned will be in the streams' native format.
    
    The streams are created and sent on-demand, so there may be a slight delay before
    it starts.
    """
        DB.incr('hits-dl')
        base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES)

        if not path.startswith(config['callsign']):
            path = "%s-%s" % (config['callsign'], path)

        if not path.endswith('.mp3'):
            path = "%s.mp3" % path

        file_name = base_dir + path

        # If the file doesn't exist, then we need to slice it and create it based on our query.
        # Also, if it's a zero byte file, then we try to create it again.
        if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0:
            cloud.register_stream_list()

            # This tells us that if it were to exist, it would be something
            # like this.
            request_info = audio.stream_info(file_name)
            logging.info(("expected value", request_info))

            # we can do something rather specific here ...
            #
            # first we get our generic stream list using our start_minute from the info.
            stream_list, episode_list = cloud.find_streams(
                start_list=[request_info['start_minute']],
                duration_min=request_info['duration_sec'] / 60.0)

            for ep in episode_list:
                episode = ep[0]
                first_slice = episode[0]

                if first_slice['week_number'] == request_info['week_number']:
                    # This means that we've found the episode that we want
                    # We will block on this.
                    relative_start_minute = request_info[
                        'start_minute'] - first_slice['start_minute']

                    logging.info(episode)
                    audio.stitch_and_slice_process(
                        file_list=episode,
                        relative_start_minute=relative_start_minute,
                        duration_minute=request_info['duration_sec'] / 60.0,
                        destination_path=path)

                    # And break out of our loop ... now everything should exist.
                    break

        return send_file_partial("%s/%s" % (base_dir, path),
                                 requested_path=path,
                                 file_name=download_name)

    @app.route('/halt')
    def halt():
        """
    Stops the webserver. This request must be issued from the localhost
    in order to succeed.
    """
        if request.remote_addr == '127.0.0.1':
            webserver_shutdown()
            misc.shutdown(do_restart=False)
            return success('halt...')

        else:
            return fail('halt aborted. Must be requested from the localhost')

    @app.route('/restart')
    def restart():
        """ 
    Restarts an instance. This does so in a gapless non-overlapping way.
    """
        webserver_shutdown()
        misc.shutdown(do_restart=True)
        return success('restarting...')

    @app.route('/dolist')
    def dolist():
        return success(misc.queue_dbg())

    @app.route('/upgrade')
    def upgrade():
        """
    Goes to the source directory, pulls down the latest from git
    and if the versions are different, the application restarts.
    """
        cwd = os.getcwd()
        os.chdir(misc.source_dir)

        os.system('/usr/bin/git pull')

        # See what the version is after the pull
        newversion = os.popen("/usr/bin/git describe").read().strip()

        if newversion != misc.__version__:
            os.system('/usr/local/bin/pip install --user -r requirements.txt')

            # from http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/
            misc.shutdown(do_restart=True)
            return success("Upgrading from %s to %s" %
                           (misc.__version__, newversion))

        os.chdir(cwd)
        return success('Version %s is current' % misc.__version__)

    @app.route('/heartbeat')
    def heartbeat():
        """
    A low resource version of the /stats call ... this is invoked
    by the server health check.  Only the vitals are reported.
    
    It helps us see if disk space is going nuts or if we aren't recording
    right now.
    
    This allows us to check if a restart happened between invocations.
    """
        return jsonify(misc.base_stats()), 200

    @app.route('/stats')
    def stats():
        """ 
    Reports various statistical metrics on a particular server.  
    Use this with the graph.py tool to see station coverage.
    """
        misc.am_i_official()

        stats = misc.base_stats()

        lockMap = {}
        for k, v in misc.lockMap.items():
            lock = v.acquire(False)
            lockMap[k] = True
            if lock:
                lockMap[k] = False
                v.release()

        stats.update({
            'kv':
            DB.all('kv'),
            'locks':
            lockMap,
            'pwd':
            os.getcwd(),
            'free':
            os.popen("/bin/df -h / | /usr/bin/tail -1").read().strip(),
            # Reporting the list as fractional GB is more useful.
            'streams':
            DB.all('streams', sort_by='start_unix'),
            'config':
            misc.public_config()
        })

        return jsonify(stats), 200

    # Using http://flask.pocoo.org/docs/0.10/patterns/streaming/ as a reference.
    @app.route('/live/<start>')
    def live(start, offset_min=0):
        """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
        DB.incr('hits-live')
        if start[0] == '-' or start.endswith('min'):
            # dump things like min or m
            start = re.sub('[a-z]', '', start)
            return redirect('/live/m%f' %
                            (float(TS.minute_now() - abs(float(start)))),
                            code=302)

        # The start is expressed in times like "11:59am ..." We utilize the
        # library we wrote for streaming to get the minute of day this is.
        if start[0] == 'm':
            requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE

        else:
            candidate = start
            requested_minute = TS.to_utc('mon', candidate) - offset_min

        offset_sec = 0
        range_header = request.headers.get('Range', None)
        if range_header:
            m = re.search('(\d+)-', range_header)
            g = m.groups()
            if g[0]:
                byte1 = int(g[0])

                # We use the byte to compute the offset
                offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) *
                                             (1000 / 8.0))

        #print "--- REQUEST @ ", start, range_header, offset_sec
        current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

        now_time = TS.now()
        requested_time = now_time - timedelta(
            minutes=current_minute) + timedelta(minutes=requested_minute)

        # print requested_time, now_time, requested_minute, current_minute
        # If the requested minute is greater than the current one, then we can presume that
        # the requested minute refers to yesterday ... as in, someone wants 11pm
        # and now it's 1am.
        if requested_minute > current_minute:
            requested_time -= timedelta(days=1)

        # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
        requested_time += timedelta(seconds=offset_sec)

        # Get the info for the file that contains this timestamp
        start_info, requested_time_available = cloud.get_file_for_ts(
            target_time=requested_time, bias=-1)

        if start_info is None or requested_time_available is None:
            return do_error("Can't find any matching files")

        requested_time = max(requested_time, requested_time_available)
        start_second = (requested_time -
                        start_info['start_date']).total_seconds()

        response = Response(audio.list_slice_stream(start_info, start_second),
                            mimetype=audio.our_mime())

        return response

    @app.route('/at/<start>/<duration_string>')
    def at(start, duration_string='1hr'):
        """
    Sends a stream using a human-readable (and human-writable) definition 
    at start time.  This uses the dateutils.parser library and so strings 
    such as "Monday 2pm" are accepted.

    Because the space, 0x20 is such a pain in HTTP, you can use "_", 
    "-" or "+" to signify it.  For instance,

        /at/monday_2pm/1hr

    Will work fine
    """
        # If it's say 1am, and I request 11pm without a day specification, it will go
        # to 11pm LAST week and not the 11pm from 2 hours ago.
        dt = TS.str_to_time(start)
        duration_min = TS.duration_parse(duration_string)
        endpoint = '%s-%s_%d.mp3' % (misc.config['callsign'],
                                     TS.ts_to_name(dt), duration_min)
        return send_stream(endpoint, download_name=endpoint)

    @app.route('/<weekday>/<start>/<duration_string>')
    def at_method2(weekday, start, duration_string):
        """
    This is identical to the stream syntax, but instead it is similar to
    /at ... it uses the same notation but instead returns an audio file
    directly.

    You must specify a single weekday ... I know, total bummer.
    """
        weekday_map = {
            'mon': 'monday',
            'tue': 'tuesday',
            'wed': 'wednesday',
            'thu': 'thursday',
            'fri': 'friday',
            'sat': 'saturday',
            'sun': 'sunday'
        }

        # The alternative form for this is something like
        # /tuesday_8pm/1hr/showname.xml
        if duration_string.count('.') > 0:
            dt = TS.str_to_time(weekday)
            start_time = TS.extract_time(weekday)

            # order is a little incompatible.
            return stream(weekday=TS.to_minute(dt),
                          start=start_time,
                          duration_string=start,
                          showname=duration_string)

        if weekday not in weekday_map:
            return 'The first parameter, %s, is not a recognized weekday.' % weekday

        return at("%s_%s" % (weekday_map[weekday], start), duration_string)

    @app.route('/<weekday>/<start>/<duration_string>/<showname>')
    def stream(weekday, start, duration_string, showname):
        """
    Returns a podcast, m3u, pls or mp3 file based on the weekday, start and duration.
    This is designed to be read by podcasting software such as podkicker, 
    itunes, and feedburner.

    weekdays are defined as mon, tue, wed, thu, fri, sat, sun.

    If a show occurs multiple times per week, this can be specified with
    a comma.  for instance,

    /mon,tue,fri/4pm/1hr
    
    The showname should be followed by an xml, pls, m3u, or mp3 extension.
    In the case of using the .mp3 extension, it only returns the most recent 
    episode.

    It should also be viewable in a modern web browser.

    If you can find a podcaster that's not supported, please send an email 
    to [email protected].
    """

        if isinstance(weekday, (float)):
            start_time_list = [weekday]
            weekday_list = [TS.WEEKDAY_LIST[int(weekday / (60 * 24))]]

        else:
            # Supports multiple weekdays
            weekday_list = weekday.split(',')
            start_time_list = [TS.to_utc(day, start) for day in weekday_list]

        duration_min = TS.duration_parse(duration_string)

        # This means we failed to parse
        if not duration_min:
            return do_error("duration '%s' is not set correctly" %
                            duration_string)

        if not isinstance(start_time_list[0], (int, float)):
            return do_error('weekday and start times are not set correctly')

        buffer_show = 2
        # In #22 We're going to add 2 minutes to the duration to make sure that we get
        # the entire episode.
        duration_min += (buffer_show * 2)

        # And according to #149 we also go a minute back for the start time ...
        # we need to do a little math to make sure we don't get a -1 edge case
        start_time_list = [
            (TS.MINUTES_PER_WEEK + offset - buffer_show) % TS.MINUTES_PER_WEEK
            for offset in start_time_list
        ]

        # If we are here then it looks like our input is probably good.

        # Strip the .xml from the showname ... this will be used in our xml.
        file_type = showname[-3:]
        showname = showname[:-4]

        # We come in with spaces as underscores so here we translate that back
        showname = re.sub('_', ' ', showname)

        # Make sure that we have all of our streams registered before trying
        # to infer what we can send to the user.
        cloud.register_stream_list()

        # Look for streams that we have which match this query and duration.
        # This will also create slices if necessary in a sub process.
        # The list of files that returns will include this not-yet-created
        # file-name as essentially a "promise" to when it will be made.
        feed_list = cloud.find_and_make_slices(start_time_list, duration_min)
        # print feed_list

        # Then, taking those two things, make a feed list from them.
        return generate_feed(file_type=file_type,
                             showname=showname,
                             feed_list=feed_list,
                             duration_min=duration_min,
                             weekday_list=weekday_list,
                             start=start,
                             duration_string=duration_string)

    if __name__ == 'lib.server':
        # When we do an upgrade or a restart, there's a race condition of getting to start this server
        # before the previous one has cleaned up all the socket work.  So if the time is under our
        # patience threshold then we sleep a second and just try again, hoping that it will work.
        patience = misc.PROCESS_DELAY * 2
        attempt = 1

        start = TS.unixtime('delay')

        while TS.unixtime('delay') - start < (patience + 3):
            logging.info('Listening on %s' % config['port'])

            try:
                app.run(threaded=True,
                        use_reloader=False,
                        port=config['port'],
                        host='0.0.0.0')
                break

            except Exception as exc:
                if TS.unixtime('delay') - start < patience:
                    logging.info(
                        '[attempt: %d] Error, can not start server ... perhaps %s is already in use?'
                        % (attempt, config['port']))
                    attempt += 1
                    time.sleep(misc.PROCESS_DELAY / 4)

                elif TS.unixtime('delay') - start < (patience + 4):
                    pid = os.popen(
                        "netstat -anlp | grep :%s | awk ' { print $NF }' | sed 's/\/.*//'"
                        % config['port']).read().strip().split('\n')[0]

                    try:
                        logging.info("F**k it, I'm killing pid %s." % pid)
                        os.kill(int(pid), 15)

                    except:
                        pass

                    time.sleep(misc.PROCESS_DELAY / 4)
Example #6
0
  def cback(data): 
    global g_download_kill_pid

    """
      if len(data):
        catchall('download', json.dumps([g_download_kill_pid, nl['pid'], len(data)]))
      else:
        catchall('download', json.dumps([g_download_kill_pid, 'no data']))
    """
    # print nl['pid'], g_download_kill_pid
    if nl['pid'] <= g_download_kill_pid or not data:
      logging.info("Stopping download #%d" % nl['pid'])
      return False

    # misc.params can fail based on a shutdown sequence.
    if misc is None or misc.params is None or not misc.manager_is_running():
      # if misc is not None:
      #  misc.shutdown()
      return False

    elif not misc.params['shutdown_time']:
      if not misc.download_ipc.empty():
        what, value = misc.download_ipc.get(False)
        if what == 'shutdown_time':
          misc.params['shutdown_time'] = value

    elif TS.unixtime('dl') > misc.params['shutdown_time']:
      raise TypeError("Download Stop")

    if misc.params['isFirst'] == True:
      misc.params['isFirst'] = False

      if len(data) < 800:
        try:
          data_string = data.decode('utf-8')
        
          if re.match('https?://', data_string):
            # If we are getting a redirect then we don't mind, we
            # just put it in the stream and then we leave
            misc.queue.put(('stream', data_string.strip()))
            return False

          # A pls style playlist
          elif re.findall('File\d', data_string, re.M):
            logging.info('%d: Found a pls, using the File1 parameter' % (nl['pid'], ))
            matches = re.findall('File1=(.*)\n', data_string, re.M)
            misc.queue.put(('stream', matches[0].strip()))
            return False

        # If it gets here it's binary ... I guess that's fine.
        except:
          pass

    # This provides a reliable way to determine bitrate.  We look at how much 
    # data we've received between two time periods
    misc.queue.put(('heartbeat', (TS.unixtime('hb'), nl['pid'], len(data))))

    if not nl['stream']:
      try:
        nl['stream'] = open(file_name, 'wb')

      except Exception as exc:
        logging.critical("%d: Unable to open %s. Can't record. Must exit." % (nl['pid'], file_name))
        return False

    nl['stream'].write(data)
Example #7
0
def stream_manager():
  global g_download_kill_pid
  import random

  # Manager process which makes sure that the
  # streams are running appropriately.
  callsign = misc.config['callsign']

  #
  # AAC bitrate is some non-trivial thing that even ffprobe doesn't
  # do a great job at. This solution looks at number of bits that
  # transit over the wire given a duration of time, and then uses
  # that to compute the bitrate, since in practice, that's what
  # bitrate effectively means, and why it's such an important metric.
  #
  # This is to compute a format agnostic bitrate
  # (see heartbeat for more information)
  #
  has_bitrate = DB.get('bitrate')
  if has_bitrate and int(has_bitrate) == 0:
    has_bitrate = False

  first_time = 0
  total_bytes = 0
  normalize_delay = 6
  cycle_count = 0

  cascade_time = misc.config['cascade_time']
  cascade_buffer = misc.config['cascade_buffer']
  cascade_margin = cascade_time - cascade_buffer

  last_prune = 0
  last_success = 0
  last_heartbeat = None
  
  change_state = None
  SHUTDOWN = 1
  RESTART = 2
  shutdown_time = None
  misc.download_ipc = Queue()

  # Number of seconds to be cycling
  cycle_time = misc.config['cycle_time']

  process = None
  process_next = None

  # The manager will be the one that starts this.
  #server.manager(misc.config)
  webserver = Thread(target=server.manager, name='Webserver', args=(misc.config,))
  webserver.start()

  file_name = None

  # A wrapper function to start a donwnload process
  def download_start(file_name):
    """ Starts a process that manages the downloading of a stream. """
    global g_download_pid

    g_download_pid += 1

    #
    # There may be a multi-second lapse time from the naming of the file to
    # the actual start of the download so we should err on that side by putting it
    # in the future by some margin
    #
    file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
    logging.info('Starting download #%d (%s). Next up in %ds' % (g_download_pid, file_name, cascade_margin))

    process = Thread(target=stream_download, name='Download-%d:%s' % (g_download_pid, TS.ts_to_name()), args=(callsign, misc.config['stream'], g_download_pid, file_name))
    process.daemon = True
    process.start()
    return [file_name, process]


  # see https://github.com/kristopolous/DRR/issues/91:
  # Randomize prune to offload disk peaks
  prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0)
  misc.prune_duration = prune_duration

  last_heartbeat_tid = -1
  while True:
    #
    # We cycle this to off for every run. By the time we go throug the queue so long 
    # as we aren't supposed to be shutting down, this should be toggled to true.
    #
    if last_prune < (TS.unixtime('prune') - prune_duration):
      prune_duration = misc.config['prune_every'] * (1.10 - random.random() / 5.0)
      misc.prune_duration = prune_duration
      # We just assume it can do its business in under a day
      prune = cloud.prune()
      last_prune = TS.unixtime('prune')
      misc.last_prune = last_prune

    # Increment the amount of time this has been running
    if cycle_count % 30 == 0:
      # we only do these things occasionally, they 
      # are either not very important or are not
      # expected to change that often
      TS.get_offset()

    cycle_count += 1

    lr_set = False
    expired_heartbeat = last_heartbeat and time.time() - last_heartbeat > cycle_time * 2

    while not misc.queue.empty():
      what, value = misc.queue.get(False)

      # The curl proces discovered a new stream to be
      # used instead.
      if what == 'stream':
        misc.config['stream'] = value
        logging.info("Using %s as the stream now" % value)
        # We expire our heartbeat in order to force a new stream
        # to start
        expired_heartbeat = True

      elif what == 'db-debug':
        DB.debug()

      elif what == 'shutdown':
        change_state = SHUTDOWN

      elif what == 'restart':
        logging.info(DB.get('runcount', use_cache=False))
        cwd = os.getcwd()
        os.chdir(misc.PROCESS_PATH)
        Popen(sys.argv)
        os.chdir(cwd)

        change_state = RESTART

        # Try to record for another restart_overlap seconds - make sure that
        # we don't perpetually put this in the future due to some bug.
        if not shutdown_time:
          shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap']
          logging.info("Restart requested ... shutting down download at %s" % TS.ts_to_name(shutdown_time, with_seconds=True))

          #misc.shutdown_real(do_restart=False)
          #misc.download_ipc.put(('shutdown_time', shutdown_time))

          while True:
            time.sleep(5)
            with open(misc.PIDFILE_MANAGER, 'r') as f:
              manager_pid = f.read()

            #print manager_pid, os.getpid(), manager_pid == os.getpid()
            #logging.info(DB.get('runcount', use_cache=False))
            #logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())
            ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())

            if ps_out > 1: 
              logging.info("Found %d potential candidates (need at least 2)" % ps_out)
              # This makes it a restricted soft shutdown
              misc.shutdown_real(do_restart=True)
              misc.download_ipc.put(('shutdown_time', shutdown_time))
              break

            else:
              Popen(sys.argv)
              logging.warn("Couldn't find a replacement process ... not going anywhere.");

      elif what == 'heartbeat':
        if not lr_set:
          lr_set = True
          last_heartbeat = time.time()
          last_heartbeat_tid = value[1]

          if last_heartbeat_tid < g_download_kill_pid:
            logging.warn("hb: Got a heartbeat for #%d but everything below #%d should be gone!" % (last_heartbeat_tid, g_download_kill_pid))

          DB.set('last_recorded', time.time())

        if not has_bitrate: 
          margin = 60

          # Keep track of the first time this stream started (this is where our total
          # byte count is derived from)
          if not first_time: 
            first_time = value[0]

          #
          # Otherwise we give a large (in computer time) margin of time to confidently
          # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
          # the estimation falls within 98% of the destination.  I'm pretty sure it's really
          # unlikely this will come out erroneous, but I really can't do the math, it's probably
          # a T value, but I don't know. Anyway, whatevs.
          #
          # The normalize_delay here is for both he-aac+ streams which need to put in some frames
          # before the quantizing pushes itself up and for other stations which sometimes put a canned
          # message at the beginning of the stream, like "Live streaming supported by ..."
          #
          # Whe we discount the first half-dozen seconds as not being part of the total, we get a 
          # stabilizing convergence far quicker.
          #
          elif (value[0] - first_time > normalize_delay):
            # If we haven't determined this stream's bitrate (which we use to estimate 
            # the amount of content is in a given archived stream), then we compute it 
            # here instead of asking the parameters of a given block and then presuming.
            total_bytes += value[2]

            # We still give it a time period after the normalizing delay in order to build enough
            # samples to make a solid guess at what this number should be.
            if (value[0] - first_time > (normalize_delay + margin)):
              # We take the total bytes, calculate it over our time, in this case, 25 seconds.
              est = total_bytes / (value[0] - first_time - normalize_delay)

              # We find the nearest 8Kb increment this matches and then scale out.
              # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
              bitrate = int( round (est / 1000) * 8 )
              #print("Estimated bitrate:%d total:%d est:%d denom:%d" % (bitrate, total_bytes, est, value[0] - first_time - normalize_delay) )
              if bitrate > 0:
                DB.set('bitrate', bitrate)
                has_bitrate = DB.get('bitrate')

    #if last_heartbeat:
    #  logging.info("%d heartbeat %d" % (last_heartbeat, last_heartbeat_tid))

    # Check for our management process
    if not misc.manager_is_running():
      logging.info("Manager isn't running");
      change_state = SHUTDOWN

    # we get here if we should NOT be recording.  So we make sure we aren't.
    if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time):
      misc.shutdown_real()

    else:
      if not process and not change_state:
        logging.info("Failed to find downloader, starting new one")
        file_name, process = download_start(file_name)
        last_success = TS.unixtime('dl')

      # If we've hit the time when we ought to cascade
      # If our last_success stream was more than cascade_time - cascade_buffer
      # then we start our process_next
      elif TS.unixtime('dl') - last_success > cascade_margin or expired_heartbeat:
        #logging.info("heartbeat expired %s %s %d %d %d" % (type(process_next), type(process), last_success, cascade_time, TS.unixtime('dl')))

        # And we haven't created the next process yet, then we start it now.
        if not process_next:
          logging.info("Failed to find downloader, starting new one")
          file_name, process_next = download_start(file_name)

      
      # If there is still no process then we should definitely bail.
      if not process:
        misc.shutdown_real()

    #
    # This needs to be on the outside loop in case we are doing a cascade
    # outside of a full mode. In this case, we will need to shut things down
    #
    # If we are past the cascade_time and we have a process_next, then
    # we should shutdown our previous process and move the pointers around.
    #
    if not change_state and (expired_heartbeat or (TS.unixtime('dl') - last_success > cascade_time and process)):
      g_download_kill_pid += 1
      #process.terminate()

      # If the process_next is running then we move our last_success forward to the present
      last_success = TS.unixtime('dl')

      # we rename our process_next AS OUR process
      process = process_next

      # and then clear out the old process_next pointer
      process_next = None

    time.sleep(cycle_time)
Example #8
0
  # servers and instances.
  DB.upgrade()
  del(DB.upgrade)
  DB.incr('runcount')

  # This is how we discover if we are the official server or not.
  # Look at the /uuid endpoint to see how this magic works.
  misc.config['uuid'] = os.popen('uuidgen').read().strip()

  signal.signal(signal.SIGINT, misc.shutdown_handler)
  signal.signal(signal.SIGUSR1, misc.shutdown_handler)
  signal.signal(signal.SIGHUP, misc.do_nothing)


misc.IS_TEST = False
misc.start_time = TS.unixtime()

parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", default="./indy_config.txt", help="Configuration file (default ./indy_config.txt)")
parser.add_argument('--debug', action='store_true', help="Load PDB for debugging")
parser.add_argument('--version', action='version', version='indycast %s :: Aug 2015' % misc.__version__)
parser.add_argument("--daemon", action='store_true',  help="Run as daemon")
args = parser.parse_args()
if args.daemon:
  Popen( [x for x in sys.argv if x != '--daemon'] )
  sys.exit(0)

def new_sys_exit(value): 
  pdb.set_trace()
  old_sys_exit(value)
  sys.exit = new_sys_exit
Example #9
0
def manager(config):
  # Main flask process that manages the end points. 
  app = Flask(__name__)

  def webserver_shutdown(signal=15, frame=None):
    logging.info('Shutting down webserver')
    request.environ.get('werkzeug.server.shutdown')()

  def success(message):
    return jsonify({'res': True, 'message': message}), 200

  def fail(message):
    return jsonify({'res': False, 'message': message}), 500

  # from http://blog.asgaard.co.uk/2012/08/03/http-206-partial-content-for-flask-python
  @app.after_request
  def after_request(response):
    # Supports 206 partial content requests for podcast streams. 
    response.headers.add('Accept-Ranges', 'bytes')
    logging.info('ua - %s' % request.headers.get('User-Agent'))
    return response


  def send_file_partial(path, requested_path='', file_name=None):
    # Wrapper around send_file which handles HTTP 206 Partial Content
    # (byte ranges)

    # If we requested something that isn't around, then we bail.
    if not os.path.exists(path):
      return 'File %s not found. Perhaps the stream is old?' % requested_path, 404

    range_header = request.headers.get('Range', None)
    if not range_header: 
      with open(path, 'rb') as f:
        data = f.read()

      rv = Response( data, 200, mimetype=audio.our_mime(), direct_passthrough=True )
      if not file_name:
        file_name = os.path.basename(path)
        
      rv.headers.add('Content-Disposition', 'attachment; filename="%s"' % file_name)
      return rv
    
    size = os.path.getsize(path)    
    byte1, byte2 = 0, None
    
    m = re.search('(\d+)-(\d*)', range_header)
    g = m.groups()
    
    if g[0]: 
      byte1 = int(g[0])

    if g[1]: 
      byte2 = int(g[1])

    length = size - byte1
    if byte2 is not None:
      length = byte2 - byte1
    
    data = None
    with open(path, 'rb') as f:
      f.seek(byte1)
      data = f.read(length + 1)

    rv = Response( data, 206, mimetype=audio.our_mime(), direct_passthrough=True )
    disposition = 'attachment;'

    if file_name:
      disposition += ' file_name="%s"' % file_name

    rv.headers.add('Content-Disposition', disposition)
    rv.headers.add('Content-Range', 'bytes {0}-{1}/{2}'.format(byte1, byte1 + length, size))

    return rv

  # From http://stackoverflow.com/questions/13317536/get-a-list-of-all-routes-defined-in-the-app
  @app.route("/help")
  def site_map():
    """ 
    Shows all the end points supported by the current server, the options 
    and the documentation.
    """
    output = ['-=#| Welcome to indycast %s API help |#=-' % misc.__version__, '']

    for rule in app.url_map.iter_rules():

      if rule.endpoint == 'static': continue
     
      options = {}
      for arg in rule.arguments:
        options[arg] = "{0}".format(arg)

      url = url_for(rule.endpoint, **options)
      line = "{} {}".format(url, app.view_functions[rule.endpoint].__doc__)
      output.append(line)
      output.append("")

    return Response('\n'.join(output), mimetype='text/plain')


  @app.route('/robots.txt')
  def robots():
    """
    Sends off robots.txt for crawlers
    """
    return send_file('%s/robots.txt' % (misc.source_dir, ))

  @app.route('/uuid')
  def my_uuid():
    """ 
    Returns this server's uuid which is generated each time it is run.
    This is used to determine whether this is the official server or not.
    """
    return misc.config['uuid']

  @app.route('/db')
  def database():
    """ 
    Backs up the current sqlite3 db and sends a gzipped version of it as the response.
    """
    filename = '%s/%s/%s-%s.gz' % (misc.config['storage'], misc.DIR_BACKUPS, misc.config['callsign'], time.strftime('%Y%m%d-%H%M', time.localtime()))
    os.popen('/usr/bin/sqlite3 config.db .dump | /bin/gzip -9 > %s' % filename)
    time.sleep(1)
    return send_file(filename)

  @app.route('/rename')
  def rename():
    return cloud.rename()

  @app.route('/reindex')
  def reindex():
    """ 
    Starts the prune process which cleans up and offloads audio files but also re-index 
    the database.

    This is useful in the cases where bugs have led to improper registration of the 
    streams and a busted building of the database.  It's fairly expensive in I/O costs 
    so this shouldn't be done as the default.
    """
    cloud.prune(reindex=True)
    return success('Reindexing...')

  @app.route('/prune')
  def prune():
    """ 
    Starts the prune sub-process which cleans up and offloads audio files 
    following the rules outlined in the configuration file (viewable with the stats call)
    """
    cloud.prune(force=True)
    return success('Pruning...')

  @app.route('/slices/<time>/<name>')
  def send_named_stream(time, name):
    """
    Similar to the /slices/path endpoint, this end point sends a stream that is at time <time> with
    name <name>.
    """
    return send_stream(time, download_name=name)

  @app.route('/slices/<path:path>')
  def send_stream(path, download_name=None):
    """
    Downloads a stream from the server. The path is callsign-date_duration.mp3

      * callsign: The callsign returned by /stats
      * date: in the format YYYYMMDDHHMM such as 201508011005 for 
        2015-08-01 10:05
      * duration: A value, in minutes, to return.

    The mp3 extension should be used regardless of the actual format of the stream -
    although the audio returned will be in the streams' native format.
    
    The streams are created and sent on-demand, so there may be a slight delay before
    it starts.
    """
    DB.incr('hits-dl')
    base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES)

    if not path.startswith(config['callsign']):
      path = "%s-%s" % (config['callsign'], path)

    if not path.endswith('.mp3'):
      path = "%s.mp3" % path

    file_name = base_dir + path

    # If the file doesn't exist, then we need to slice it and create it based on our query.
    # Also, if it's a zero byte file, then we try to create it again.
    if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0:
      cloud.register_stream_list()

      # This tells us that if it were to exist, it would be something
      # like this.
      request_info = audio.stream_info(file_name)
      logging.info(("expected value", request_info))

      # we can do something rather specific here ... 
      #
      # first we get our generic stream list using our start_minute from the info.
      stream_list, episode_list = cloud.find_streams(start_list=[request_info['start_minute']], duration_min=request_info['duration_sec'] / 60.0)
      
      for ep in episode_list:
        episode = ep[0]
        first_slice = episode[0]

        if first_slice['week_number'] == request_info['week_number']:
          # This means that we've found the episode that we want
          # We will block on this.
          relative_start_minute = request_info['start_minute'] - first_slice['start_minute']

          logging.info(episode)
          audio.stitch_and_slice_process(file_list=episode, relative_start_minute=relative_start_minute, duration_minute=request_info['duration_sec'] / 60.0, destination_path=path)

          # And break out of our loop ... now everything should exist.
          break

    return send_file_partial("%s/%s" % (base_dir, path), requested_path=path, file_name=download_name)

  @app.route('/halt')
  def halt():
    """
    Stops the webserver. This request must be issued from the localhost
    in order to succeed.
    """
    if request.remote_addr == '127.0.0.1':
      webserver_shutdown()
      misc.shutdown(do_restart=False)
      return success('halt...')

    else:
      return fail('halt aborted. Must be requested from the localhost')

  @app.route('/restart')
  def restart():
    """ 
    Restarts an instance. This does so in a gapless non-overlapping way.
    """
    webserver_shutdown()
    misc.shutdown(do_restart=True)
    return success('restarting...')


  @app.route('/dolist')
  def dolist():
    return success(misc.queue_dbg())

  @app.route('/upgrade')
  def upgrade():
    """
    Goes to the source directory, pulls down the latest from git
    and if the versions are different, the application restarts.
    """
    cwd = os.getcwd()
    os.chdir(misc.source_dir)

    os.system('/usr/bin/git pull') 

    # See what the version is after the pull
    newversion = os.popen("/usr/bin/git describe").read().strip()

    if newversion != misc.__version__:
      os.system('/usr/local/bin/pip install --user -r requirements.txt') 

      # from http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/
      misc.shutdown(do_restart=True)
      return success("Upgrading from %s to %s" % (misc.__version__, newversion))

    os.chdir(cwd)
    return success('Version %s is current' % misc.__version__)


  @app.route('/heartbeat')
  def heartbeat():
    """
    A low resource version of the /stats call ... this is invoked
    by the server health check.  Only the vitals are reported.
    
    It helps us see if disk space is going nuts or if we aren't recording
    right now.
    
    This allows us to check if a restart happened between invocations.
    """
    return jsonify(misc.base_stats()), 200


  @app.route('/stats')
  def stats():
    """ 
    Reports various statistical metrics on a particular server.  
    Use this with the graph.py tool to see station coverage.
    """
    misc.am_i_official()

    stats = misc.base_stats()

    lockMap = {}
    for k, v in misc.lockMap.items():
      lock = v.acquire(False)
      lockMap[k] = True
      if lock:
        lockMap[k] = False
        v.release()

    stats.update({
      'kv': DB.all('kv'),
      'locks': lockMap,
      'pwd': os.getcwd(),
      'free': os.popen("/bin/df -h / | /usr/bin/tail -1").read().strip(),
      # Reporting the list as fractional GB is more useful.
      'streams': DB.all('streams', sort_by='start_unix'),
      'config': misc.public_config()
    })

    return jsonify(stats), 200
  

  # Using http://flask.pocoo.org/docs/0.10/patterns/streaming/ as a reference.
  @app.route('/live/<start>')
  def live(start, offset_min=0):
    """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
    DB.incr('hits-live')
    if start[0] == '-' or start.endswith('min'):
      # dump things like min or m
      start = re.sub('[a-z]', '', start)
      return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302)

    # The start is expressed in times like "11:59am ..." We utilize the
    # library we wrote for streaming to get the minute of day this is.
    if start[0] == 'm':
      requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE 

    else:
      candidate = start
      requested_minute = TS.to_utc('mon', candidate) - offset_min

    offset_sec = 0
    range_header = request.headers.get('Range', None)
    if range_header:
      m = re.search('(\d+)-', range_header)
      g = m.groups()
      if g[0]: 
        byte1 = int(g[0])

        # We use the byte to compute the offset
        offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0))
    

    #print "--- REQUEST @ ", start, range_header, offset_sec
    current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

    now_time = TS.now()
    requested_time = now_time - timedelta(minutes=current_minute) + timedelta(minutes=requested_minute)

    # print requested_time, now_time, requested_minute, current_minute
    # If the requested minute is greater than the current one, then we can presume that
    # the requested minute refers to yesterday ... as in, someone wants 11pm
    # and now it's 1am.
    if requested_minute > current_minute:
      requested_time -= timedelta(days=1)

    # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
    requested_time += timedelta(seconds=offset_sec)

    # Get the info for the file that contains this timestamp
    start_info, requested_time_available = cloud.get_file_for_ts(target_time=requested_time, bias=-1)

    if start_info is None or requested_time_available is None:
      return do_error("Can't find any matching files")

    requested_time = max(requested_time, requested_time_available)
    start_second = (requested_time - start_info['start_date']).total_seconds()

    response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime())

    return response


  @app.route('/at/<start>/<duration_string>')
  def at(start, duration_string='1hr'):
    """
    Sends a stream using a human-readable (and human-writable) definition 
    at start time.  This uses the dateutils.parser library and so strings 
    such as "Monday 2pm" are accepted.

    Because the space, 0x20 is such a pain in HTTP, you can use "_", 
    "-" or "+" to signify it.  For instance,

        /at/monday_2pm/1hr

    Will work fine
    """
    # If it's say 1am, and I request 11pm without a day specification, it will go 
    # to 11pm LAST week and not the 11pm from 2 hours ago.
    dt = TS.str_to_time(start)
    duration_min = TS.duration_parse(duration_string)
    endpoint = '%s-%s_%d.mp3' % (misc.config['callsign'], TS.ts_to_name(dt), duration_min)
    return send_stream(endpoint, download_name=endpoint)

  @app.route('/<weekday>/<start>/<duration_string>')
  def at_method2(weekday, start, duration_string):
    """
    This is identical to the stream syntax, but instead it is similar to
    /at ... it uses the same notation but instead returns an audio file
    directly.

    You must specify a single weekday ... I know, total bummer.
    """
    weekday_map = {
      'mon': 'monday', 
      'tue': 'tuesday',
      'wed': 'wednesday',
      'thu': 'thursday', 
      'fri': 'friday', 
      'sat': 'saturday', 
      'sun': 'sunday'
    }

    # The alternative form for this is something like
    # /tuesday_8pm/1hr/showname.xml
    if duration_string.count('.') > 0:
      dt = TS.str_to_time(weekday)
      start_time = TS.extract_time(weekday)

      # order is a little incompatible.
      return stream(weekday=TS.to_minute(dt), start=start_time, duration_string=start, showname=duration_string)


    if weekday not in weekday_map:
      return 'The first parameter, %s, is not a recognized weekday.' % weekday

    return at("%s_%s" % (weekday_map[weekday], start), duration_string)
    

  @app.route('/<weekday>/<start>/<duration_string>/<showname>')
  def stream(weekday, start, duration_string, showname):
    """
    Returns a podcast, m3u, pls, html or mp3 file based on the weekday, 
    start and duration.  This is designed to be read by podcasting 
    software such as podkicker, itunes, and feedburner.

    The default format if nothing is specified is XML.

    weekdays are defined as mon, tue, wed, thu, fri, sat, sun.

    If a show occurs multiple times per week, this can be specified with
    a comma.  for instance,

    /mon,tue,fri/4pm/1hr
    
    The showname should be followed by an xml, pls, m3u, or mp3 extension.
    In the case of using the .mp3 extension, it only returns the most recent 
    episode.

    It should also be viewable in a modern web browser.

    If you can find a podcaster that's not supported, please send an email 
    to [email protected].
    """
    
    if isinstance(weekday, (float)):
      start_time_list = [weekday]
      weekday_list = [ TS.WEEKDAY_LIST[ int(weekday / (60 * 24)) ] ]

    else:
      # Supports multiple weekdays
      weekday_list = weekday.split(',')
      start_time_list = [TS.to_utc(day, start) for day in weekday_list]

    duration_min = TS.duration_parse(duration_string)

    # This means we failed to parse
    if not duration_min:
      return do_error("duration '%s' is not set correctly" % duration_string)

    if not isinstance(start_time_list[0], (int, float)):
      return do_error('weekday and start times are not set correctly')

    buffer_show = 2
    # In #22 We're going to add 2 minutes to the duration to make sure that we get
    # the entire episode.
    duration_min += (buffer_show * 2)

    # And according to #149 we also go a minute back for the start time ... 
    # we need to do a little math to make sure we don't get a -1 edge case
    start_time_list = [(TS.MINUTES_PER_WEEK + offset - buffer_show) % TS.MINUTES_PER_WEEK for offset in start_time_list]

    # If we are here then it looks like our input is probably good.
    
    # Strip the .xml from the showname ... this will be used in our xml.
    parts = showname.split('.')
    file_type = parts.pop()
    showname = '.'.join(parts)

    # We come in with spaces as underscores so here we translate that back
    showname = re.sub('_', ' ', showname)

    # Make sure that we have all of our streams registered before trying
    # to infer what we can send to the user.
    cloud.register_stream_list()

    # Look for streams that we have which match this query and duration.
    # This will also create slices if necessary in a sub process.
    # The list of files that returns will include this not-yet-created
    # file-name as essentially a "promise" to when it will be made.
    feed_list = cloud.find_and_make_slices(start_time_list, duration_min)
    # print feed_list

    # Then, taking those two things, make a feed list from them.
    return generate_feed(
      file_type=file_type,
      showname=showname, 
      feed_list=feed_list, 
      duration_min=duration_min, 
      weekday_list=weekday_list, 
      start=start, 
      duration_string=duration_string
    )


  if __name__ == 'lib.server':
    # When we do an upgrade or a restart, there's a race condition of getting to start this server
    # before the previous one has cleaned up all the socket work.  So if the time is under our
    # patience threshold then we sleep a second and just try again, hoping that it will work.
    patience = misc.PROCESS_DELAY * 2
    attempt = 1

    start = TS.unixtime('delay')

    while TS.unixtime('delay') - start < (patience + 3):
      logging.info('Listening on %s' % config['port'])

      try:
        app.run(threaded=True, use_reloader=False, port=config['port'], host='0.0.0.0')
        break

      except Exception as exc:
        if TS.unixtime('delay') - start < patience:
          logging.info('[attempt: %d] Error, can not start server ... perhaps %s is already in use?' % (attempt, config['port']))
          attempt += 1
          time.sleep(misc.PROCESS_DELAY / 4)

        elif TS.unixtime('delay') - start < (patience + 4):
          pid = os.popen("netstat -anlp | grep :%s | awk ' { print $NF }' | sed 's/\/.*//'" % config['port']).read().strip().split('\n')[0]

          try:
            logging.info("F**k it, I'm killing pid %s." % pid)
            os.kill(int(pid), 15)

          except:
            pass

          time.sleep(misc.PROCESS_DELAY / 4)
Example #10
0
def prune_process(reindex=False, force=False):
  import lib.misc as misc 
  # This is internal, call prune() directly. This is a normally blocking
  # process that is prepared by prune(), making it easily callable asynchronously 
  # If another prune is running then we just bail
  if not misc.lockMap['prune'].acquire(False) and not force:
    logging.warn("Tried to run another prune whilst one is running. Aborting")
    return True

  # If we are the first process then we need to make sure that the webserver is up before
  # we do this to check to see if we are official
  time.sleep(2)

  #pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign'])
  # We want to run the am_i_official here since it could block on a DNS lookup
  misc.am_i_official()

  try:
    register_stream_list(reindex)

  except Exception as e:
    logging.info("Wasn't able to register streams: %s" % e)
    misc.lockMap['prune'].release()
    return None

  archive_duration = misc.config['cloud_archive']
  cutoff = TS.unixtime('prune') - archive_duration

  # Remove all slices older than 4 hours.
  slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND

  cloud_cutoff = None
  if misc.config['cloud']:
    cloud_cutoff = TS.unixtime('prune') - misc.config['disk_archive']

  # Put thingies into the cloud.
  count_cloud = 0
  count_delete = 0

  for file_name in glob('*/*.mp3'):
    #
    # Depending on many factors this could be running for hours
    # or even days.  We want to make sure this isn't a blarrrghhh
    # zombie process or worse yet, still running and competing with
    # other instances of itself.
    #
    if not misc.manager_is_running():
      misc.lockMap['prune'].release()
      return None

    if not os.path.exists(file_name):
      continue 

    ctime = os.path.getctime(file_name)

    # print "Looking at ", file_name, ctime, cutoff, archive_duration,  misc.config['archive'], misc.am_i_official()
    # We observe the rules set up in the config.
    logging.debug("%s cloud:%d ctime:%d slice:%d cutoff:%d ctime-cloud:%d ctime-slice:%d" %(file_name, cloud_cutoff, ctime, slice_cutoff, cutoff, ctime-cloud_cutoff, ctime-slice_cutoff ))
    if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff:
      logging.debug("Prune[remove]: %s (ctime)" % file_name)
      os.unlink(file_name)
      count_delete += 1 

    # We want to make sure we aren't archiving the slices
    elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice'):
      logging.debug("Prune[cloud]: %s" % file_name)

      # <s>Only unlink the file if I can successfully put it into the cloud.</s>
      #
      # Actually, policy change: 
      #   We should dump the file regardless because otherwise we would smash the disk
      #   AS HAS HAPPENED MULTIPLE TIMES
      #
      # Then you have an irrelevant past build up a forced discarding of the desired 
      # future ... just like with life itself.
      #
      res = put(file_name)

      if misc.am_i_official():
        try:
          os.unlink(file_name)

          # This is only a self-reporting system... we can use our success code for
          # our honesty here.
          if res:
            count_cloud += 1 

        except Exception as e:
          logging.debug("Prune[cloud]: Couldn't remove {}: {}".format(file_name, e))

  for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS):
    ctime = os.path.getctime(file_name)

    # We observe the rules set up in the config.
    if ctime < cutoff:
      logging.debug("Prune: %s" % file_name)
      os.unlink(file_name)
      count_delete += 1 

  # Don't do this f*****g shit at all because f**k this so hard.
  #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200))

  unlink_list = DB.run('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall()

  for file_name_tuple in unlink_list:
    file_name = str(file_name_tuple[0])
    id = file_name_tuple[1]

    #logging.debug("Prune[remove]: %s (unlink list)" % file_name)
    # If there's a cloud account at all then we need to unlink the 
    # equivalent mp3 file
    if cloud_cutoff and misc.am_i_official():
      "cloud.";unlink(file_name)

      # After we remove these streams then we delete them from the db.
      DB.run('delete from streams where id = %d' % id)

    # now only after we've deleted from the cloud can we delete the local file
    if os.path.exists(file_name):
      os.unlink(file_name)
      count_delete += 1


  logging.info("Deleted %d files and put %d on the cloud." % (count_delete, count_cloud))
  misc.lockMap['prune'].release()
Example #11
0
def stream_manager():
    import random

    # Manager process which makes sure that the
    # streams are running appropriately.
    callsign = misc.config['callsign']

    #
    # AAC bitrate is some non-trivial thing that even ffprobe doesn't
    # do a great job at. This solution looks at number of bits that
    # transit over the wire given a duration of time, and then uses
    # that to compute the bitrate, since in practice, that's what
    # bitrate effectively means, and why it's such an important metric.
    #
    # This is to compute a format agnostic bitrate
    # (see heartbeat for more information)
    #
    has_bitrate = DB.get('bitrate')
    first_time = 0
    total_bytes = 0
    normalize_delay = 6

    cascade_time = misc.config['cascadetime']
    cascade_buffer = misc.config['cascadebuffer']
    cascade_margin = cascade_time - cascade_buffer

    last_prune = 0
    last_success = 0

    change_state = None
    SHUTDOWN = 1
    RESTART = 2
    shutdown_time = None
    misc.download_ipc = Queue()

    # Number of seconds to be cycling
    cycle_time = misc.config['cycletime']

    process = None
    process_next = None

    # The manager will be the one that starts this.
    misc.pid_map['webserver'] = Process(target=server.manager,
                                        args=(misc.config, ))
    misc.pid_map['webserver'].start()

    file_name = None

    # A wrapper function to start a donwnload process
    def download_start(file_name):
        """ Starts a process that manages the downloading of a stream. """
        global g_download_pid

        g_download_pid += 1
        logging.info('Starting cascaded downloader #%d. Next up in %ds' %
                     (g_download_pid, cascade_margin))

        #
        # There may be a multi-second lapse time from the naming of the file to
        # the actual start of the download so we should err on that side by putting it
        # in the future by some margin
        #
        file_name = '%s/%s-%s.mp3' % (
            misc.DIR_STREAMS, callsign,
            TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
        process = Process(target=stream_download,
                          args=(callsign, misc.config['stream'],
                                g_download_pid, file_name))
        process.start()
        return [file_name, process]

    # see https://github.com/kristopolous/DRR/issues/91:
    # Randomize prune to offload disk peaks
    prune_duration = misc.config['pruneevery'] + (1 / 8.0 -
                                                  random.random() / 4.0)

    while True:
        #
        # We cycle this to off for every run. By the time we go throug the queue so long
        # as we aren't supposed to be shutting down, this should be toggled to true.
        #
        flag = False

        if last_prune < (TS.unixtime('prune') -
                         TS.ONE_DAY_SECOND * prune_duration):
            prune_duration = misc.config['pruneevery'] + (
                1 / 8.0 - random.random() / 4.0)
            # We just assume it can do its business in under a day
            misc.pid_map['prune'] = cloud.prune()
            last_prune = TS.unixtime('prune')

        TS.get_offset()

        lr_set = False
        while not misc.queue.empty():
            flag = True
            what, value = misc.queue.get(False)

            # The curl proces discovered a new stream to be
            # used instead.
            if what == 'stream':
                misc.config['stream'] = value
                logging.info("Using %s as the stream now" % value)
                # We now don't toggle to flag in order to shutdown the
                # old process and start a new one

            elif what == 'db-debug':
                DB.debug()

            elif what == 'shutdown':
                change_state = SHUTDOWN

            elif what == 'restart':
                logging.info(DB.get('runcount', use_cache=False))
                cwd = os.getcwd()
                os.chdir(misc.PROCESS_PATH)
                Popen(sys.argv)
                os.chdir(cwd)

                change_state = RESTART

                # Try to record for another restart_overlap seconds - make sure that
                # we don't perpetually put this in the future due to some bug.
                if not shutdown_time:
                    shutdown_time = TS.unixtime(
                        'dl') + misc.config['restart_overlap']
                    logging.info(
                        "Restart requested ... shutting down downloader at %s"
                        % TS.ts_to_name(shutdown_time, with_seconds=True))

                    while True:
                        time.sleep(20)
                        #logging.info(DB.get('runcount', use_cache=False))
                        logging.info(
                            ('ps axf | grep [%c]%s | grep python | wc -l' %
                             (misc.config['callsign'][0],
                              misc.config['callsign'][1:])).read().strip())
                        ps_out = int(
                            os.popen(
                                'ps axf | grep [%c]%s | grep python | wc -l' %
                                (misc.config['callsign'][0],
                                 misc.config['callsign'][1:])).read().strip())

                        if ps_out > 1:
                            logging.info(
                                "Found %d potential candidates (need at least 2)"
                                % ps_out)
                            # This makes it a restricted soft shutdown
                            misc.shutdown_real(do_restart=True)
                            misc.download_ipc.put(
                                ('shutdown_time', shutdown_time))
                            break

                        else:
                            Popen(sys.argv)
                            logging.warn(
                                "Couldn't find a replacement process ... not going anywhere."
                            )

            elif what == 'heartbeat':
                if not lr_set and value[1] > 100:
                    lr_set = True
                    DB.set('last_recorded', time.time())

                if not has_bitrate:

                    # Keep track of the first time this stream started (this is where our total
                    # byte count is derived from)
                    if not first_time:
                        first_time = value[0]

                    #
                    # Otherwise we give a large (in computer time) margin of time to confidently
                    # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
                    # the estimation falls within 98% of the destination.  I'm pretty sure it's really
                    # unlikely this will come out erroneous, but I really can't do the math, it's probably
                    # a T value, but I don't know. Anyway, whatevs.
                    #
                    # The normalize_delay here is for both he-aac+ streams which need to put in some frames
                    # before the quantizing pushes itself up and for other stations which sometimes put a canned
                    # message at the beginning of the stream, like "Live streaming supported by ..."
                    #
                    # Whe we discount the first half-dozen seconds as not being part of the total, we get a
                    # stabilizing convergence far quicker.
                    #
                    elif (value[0] - first_time > normalize_delay):
                        # If we haven't determined this stream's bitrate (which we use to estimate
                        # the amount of content is in a given archived stream), then we compute it
                        # here instead of asking the parameters of a given block and then presuming.
                        total_bytes += value[1]

                        # We still give it a time period after the normalizing delay in order to build enough
                        # samples to make a solid guess at what this number should be.
                        if (value[0] - first_time > (normalize_delay + 60)):
                            # We take the total bytes, calculate it over our time, in this case, 25 seconds.
                            est = total_bytes / (value[0] - first_time -
                                                 normalize_delay)

                            # We find the nearest 8Kb increment this matches and then scale out.
                            # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
                            bitrate = int(round(est / 1000) * 8)
                            DB.set('bitrate', bitrate)

        # Check for our management process
        if not misc.manager_is_running():
            logging.info("Manager isn't running")
            change_state = SHUTDOWN

        # The only way for the bool to be toggled off is if we are not in full-mode ...
        # we get here if we should NOT be recording.  So we make sure we aren't.
        if change_state == SHUTDOWN or (change_state == RESTART
                                        and TS.unixtime('dl') > shutdown_time):
            process = my_process_shutdown(process)
            process_next = my_process_shutdown(process_next)
            misc.shutdown_real()

        else:
            # Didn't respond in cycle_time seconds so kill it
            if not flag:
                process = my_process_shutdown(process)

            if not process and not change_state:
                file_name, process = download_start(file_name)
                last_success = TS.unixtime('dl')

            # If we've hit the time when we ought to cascade
            elif TS.unixtime('dl') - last_success > cascade_margin:

                # And we haven't created the next process yet, then we start it now.
                if not process_next:
                    file_name, process_next = download_start(file_name)

            # If our last_success stream was more than cascade_time - cascade_buffer
            # then we start our process_next

            # If there is still no process then we should definitely bail.
            if not process:
                misc.shutdown_real()

        #
        # This needs to be on the outside loop in case we are doing a cascade
        # outside of a full mode. In this case, we will need to shut things down
        #
        # If we are past the cascade_time and we have a process_next, then
        # we should shutdown our previous process and move the pointers around.
        #
        if not change_state and TS.unixtime(
                'dl') - last_success > cascade_time and process:
            logging.info("Stopping cascaded downloader")
            process.terminate()

            # If the process_next is running then we move our last_success forward to the present
            last_success = TS.unixtime('dl')

            # we rename our process_next AS OUR process
            process = process_next

            # and then clear out the old process_next pointer
            process_next = None

        # Increment the amount of time this has been running
        DB.incr('uptime', cycle_time)

        time.sleep(cycle_time)
Example #12
0
    signal.signal(signal.SIGINT, misc.shutdown_handler)
    signal.signal(signal.SIGUSR1, misc.shutdown_handler)
    signal.signal(signal.SIGHUP, misc.do_nothing)


if __name__ == "__main__":
    # From http://stackoverflow.com/questions/25504149/why-does-running-the-flask-dev-server-run-itself-twice

    if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
        server_manager(misc.config)

    else:
        # Ignore all test scaffolding
        misc.IS_TEST = False
        misc.start_time = TS.unixtime()

        parser = argparse.ArgumentParser()
        parser.add_argument(
            "-c",
            "--config",
            default="./indy_config.txt",
            help="Configuration file (default ./indy_config.txt)")
        parser.add_argument('--version',
                            action='version',
                            version='indycast %s :: Aug 2015' %
                            misc.__version__)
        parser.add_argument("--daemon",
                            action='store_true',
                            help="run as daemon")
        args = parser.parse_args()
Example #13
0
def prune_process(reindex=False, force=False):
    import lib.misc as misc
    # This is internal, call prune() directly. This is a normally blocking
    # process that is prepared by prune(), making it easily callable asynchronously
    # If another prune is running then we just bail
    if not misc.lockMap['prune'].acquire(False) and not force:
        logging.warn(
            "Tried to run another prune whilst one is running. Aborting")
        return True

    # If we are the first process then we need to make sure that the webserver is up before
    # we do this to check to see if we are official
    time.sleep(2)

    #pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign'])
    # We want to run the am_i_official here since it could block on a DNS lookup
    misc.am_i_official()

    try:
        register_stream_list(reindex)

    except Exception as e:
        logging.info("Wasn't able to register streams: %s" % e)
        misc.lockMap['prune'].release()
        return None

    archive_duration = misc.config['cloud_archive']
    cutoff = TS.unixtime('prune') - archive_duration

    # Remove all slices older than 4 hours.
    slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND

    cloud_cutoff = None
    if misc.config['cloud']:
        cloud_cutoff = TS.unixtime('prune') - misc.config['disk_archive']

    # Put thingies into the cloud.
    count_cloud = 0
    count_delete = 0

    for file_name in glob('*/*.mp3'):
        #
        # Depending on many factors this could be running for hours
        # or even days.  We want to make sure this isn't a blarrrghhh
        # zombie process or worse yet, still running and competing with
        # other instances of itself.
        #
        if not misc.manager_is_running():
            misc.lockMap['prune'].release()
            return None

        if not os.path.exists(file_name):
            continue

        ctime = os.path.getctime(file_name)

        # print "Looking at ", file_name, ctime, cutoff, archive_duration,  misc.config['archive'], misc.am_i_official()
        # We observe the rules set up in the config.
        logging.debug(
            "%s cloud:%d ctime:%d slice:%d cutoff:%d ctime-cloud:%d ctime-slice:%d"
            % (file_name, cloud_cutoff, ctime, slice_cutoff, cutoff,
               ctime - cloud_cutoff, ctime - slice_cutoff))
        if file_name.startswith(
                'slices') and ctime < slice_cutoff or ctime < cutoff:
            logging.debug("Prune[remove]: %s (ctime)" % file_name)
            os.unlink(file_name)
            count_delete += 1

        # We want to make sure we aren't archiving the slices
        elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith(
                'slice'):
            logging.debug("Prune[cloud]: %s" % file_name)

            # <s>Only unlink the file if I can successfully put it into the cloud.</s>
            #
            # Actually, policy change:
            #   We should dump the file regardless because otherwise we would smash the disk
            #   AS HAS HAPPENED MULTIPLE TIMES
            #
            # Then you have an irrelevant past build up a forced discarding of the desired
            # future ... just like with life itself.
            #
            res = put(file_name)

            if misc.am_i_official():
                try:
                    os.unlink(file_name)

                    # This is only a self-reporting system... we can use our success code for
                    # our honesty here.
                    if res:
                        count_cloud += 1

                except Exception as e:
                    logging.debug(
                        "Prune[cloud]: Couldn't remove {}: {}".format(
                            file_name, e))

    for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS):
        ctime = os.path.getctime(file_name)

        # We observe the rules set up in the config.
        if ctime < cutoff:
            logging.debug("Prune: %s" % file_name)
            os.unlink(file_name)
            count_delete += 1

    # Don't do this f*****g shit at all because f**k this so hard.
    #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200))

    unlink_list = DB.run(
        'select name, id from streams where end_unix < date("now", "-%d seconds")'
        % (archive_duration)).fetchall()

    for file_name_tuple in unlink_list:
        file_name = str(file_name_tuple[0])
        id = file_name_tuple[1]

        #logging.debug("Prune[remove]: %s (unlink list)" % file_name)
        # If there's a cloud account at all then we need to unlink the
        # equivalent mp3 file
        if cloud_cutoff and misc.am_i_official():
            "cloud."
            unlink(file_name)

            # After we remove these streams then we delete them from the db.
            DB.run('delete from streams where id = %d' % id)

        # now only after we've deleted from the cloud can we delete the local file
        if os.path.exists(file_name):
            os.unlink(file_name)
            count_delete += 1

    logging.info("Deleted %d files and put %d on the cloud." %
                 (count_delete, count_cloud))
    misc.lockMap['prune'].release()
Example #14
0
File: cloud.py Project: EQ4/DRR
def prune_process(lockMap, reindex=False, force=False):
  # This is internal, call prune() directly. This is a normally blocking
  # process that is prepared by prune(), making it easily callable asynchronously 
  # If another prune is running then we just bail
  if not lockMap['prune'].acquire(False) and not force:
    logging.warn("Tried to run another prune whilst one is running. Aborting")
    return True

  # If we are the first process then we need to make sure that the webserver is up before
  # we do this to check to see if we are official
  time.sleep(2)

  pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign'])

  # We want to run the am_i_official here since it could block on a DNS lookup
  misc.am_i_official()

  try:
    register_stream_list(reindex)

  except:
    lockMap['prune'].release()
    return None

  db = DB.connect()

  archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND
  cutoff = TS.unixtime('prune') - archive_duration

  # Remove all slices older than 4 hours.
  slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND

  cloud_cutoff = None
  if misc.config['cloud']:
    cloud_cutoff = TS.unixtime('prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND

  # Put thingies into the cloud.
  count = 0
  for file_name in glob('*/*.mp3'):
    #
    # Depending on many factors this could be running for hours
    # or even days.  We want to make sure this isn't a blarrrghhh
    # zombie process or worse yet, still running and competing with
    # other instances of itself.
    #
    if not misc.manager_is_running():
      lockMap['prune'].release()
      return None

    ctime = os.path.getctime(file_name)

    # print "Looking at ", file_name, ctime, cutoff, archive_duration,  misc.config['archivedays'], misc.am_i_official()
    # We observe the rules set up in the config.
    if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff:
      logging.debug("Prune[remove]: %s (ctime)" % file_name)
      os.unlink(file_name)
      count += 1 

    # We want to make sure we aren't archiving the slices
    elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice') and misc.am_i_official():
      logging.debug("Prune[cloud]: %s" % file_name)

      # Only unlink the file if I can successfully put it into the cloud.
      if put(file_name):
        try:
          os.unlink(file_name)

        except:
          logging.debug("Prune[cloud]: Couldn't remove %s" % file_name)

  for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS):
    ctime = os.path.getctime(file_name)

    # We observe the rules set up in the config.
    if ctime < cutoff:
      logging.debug("Prune: %s" % file_name)
      os.unlink(file_name)
      count += 1 

  # The map names are different since there may or may not be a corresponding
  # cloud thingie associated with it.
  db = DB.connect()

  # Don't do this f*****g shit at all because f**k this so hard.
  #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200))

  unlink_list = db['c'].execute('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall()

  for file_name_tuple in unlink_list:
    file_name = str(file_name_tuple[0])
    id = file_name_tuple[1]

    logging.debug("Prune[remove]: %s (unlink list)" % file_name)
    # If there's a cloud account at all then we need to unlink the 
    # equivalent mp3 file
    if cloud_cutoff and misc.am_i_official():
      "cloud.";unlink(file_name)

      # After we remove these streams then we delete them from the db.
      db['c'].execute('delete from streams where id = %d' % id)
      db['conn'].commit()

    # now only after we've deleted from the cloud can we delete the local file
    if os.path.exists(file_name):
      os.unlink(file_name)
      count += 1


  logging.info("Found %d files older than %s days." % (count, misc.config['archivedays']))
  lockMap['prune'].release()
Example #15
0
File: cloud.py Project: EQ4/DRR
def prune_process(lockMap, reindex=False, force=False):
    # This is internal, call prune() directly. This is a normally blocking
    # process that is prepared by prune(), making it easily callable asynchronously
    # If another prune is running then we just bail
    if not lockMap['prune'].acquire(False) and not force:
        logging.warn(
            "Tried to run another prune whilst one is running. Aborting")
        return True

    # If we are the first process then we need to make sure that the webserver is up before
    # we do this to check to see if we are official
    time.sleep(2)

    pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign'])

    # We want to run the am_i_official here since it could block on a DNS lookup
    misc.am_i_official()

    try:
        register_stream_list(reindex)

    except:
        lockMap['prune'].release()
        return None

    db = DB.connect()

    archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND
    cutoff = TS.unixtime('prune') - archive_duration

    # Remove all slices older than 4 hours.
    slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND

    cloud_cutoff = None
    if misc.config['cloud']:
        cloud_cutoff = TS.unixtime(
            'prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND

    # Put thingies into the cloud.
    count = 0
    for file_name in glob('*/*.mp3'):
        #
        # Depending on many factors this could be running for hours
        # or even days.  We want to make sure this isn't a blarrrghhh
        # zombie process or worse yet, still running and competing with
        # other instances of itself.
        #
        if not misc.manager_is_running():
            lockMap['prune'].release()
            return None

        ctime = os.path.getctime(file_name)

        # print "Looking at ", file_name, ctime, cutoff, archive_duration,  misc.config['archivedays'], misc.am_i_official()
        # We observe the rules set up in the config.
        if file_name.startswith(
                'slices') and ctime < slice_cutoff or ctime < cutoff:
            logging.debug("Prune[remove]: %s (ctime)" % file_name)
            os.unlink(file_name)
            count += 1

        # We want to make sure we aren't archiving the slices
        elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith(
                'slice') and misc.am_i_official():
            logging.debug("Prune[cloud]: %s" % file_name)

            # Only unlink the file if I can successfully put it into the cloud.
            if put(file_name):
                try:
                    os.unlink(file_name)

                except:
                    logging.debug("Prune[cloud]: Couldn't remove %s" %
                                  file_name)

    for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS):
        ctime = os.path.getctime(file_name)

        # We observe the rules set up in the config.
        if ctime < cutoff:
            logging.debug("Prune: %s" % file_name)
            os.unlink(file_name)
            count += 1

    # The map names are different since there may or may not be a corresponding
    # cloud thingie associated with it.
    db = DB.connect()

    # Don't do this f*****g shit at all because f**k this so hard.
    #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200))

    unlink_list = db['c'].execute(
        'select name, id from streams where end_unix < date("now", "-%d seconds")'
        % (archive_duration)).fetchall()

    for file_name_tuple in unlink_list:
        file_name = str(file_name_tuple[0])
        id = file_name_tuple[1]

        logging.debug("Prune[remove]: %s (unlink list)" % file_name)
        # If there's a cloud account at all then we need to unlink the
        # equivalent mp3 file
        if cloud_cutoff and misc.am_i_official():
            "cloud."
            unlink(file_name)

            # After we remove these streams then we delete them from the db.
            db['c'].execute('delete from streams where id = %d' % id)
            db['conn'].commit()

        # now only after we've deleted from the cloud can we delete the local file
        if os.path.exists(file_name):
            os.unlink(file_name)
            count += 1

    logging.info("Found %d files older than %s days." %
                 (count, misc.config['archivedays']))
    lockMap['prune'].release()
Example #16
0
    def cback(data):
        global g_download_kill_pid
        """
      if len(data):
        catchall('download', json.dumps([g_download_kill_pid, nl['pid'], len(data)]))
      else:
        catchall('download', json.dumps([g_download_kill_pid, 'no data']))
    """
        # print nl['pid'], g_download_kill_pid
        if nl['pid'] <= g_download_kill_pid or not data:
            logging.info("Stopping download #%d" % nl['pid'])
            return False

        # misc.params can fail based on a shutdown sequence.
        if misc is None or misc.params is None or not misc.manager_is_running(
        ):
            # if misc is not None:
            #  misc.shutdown()
            return False

        elif not misc.params['shutdown_time']:
            if not misc.download_ipc.empty():
                what, value = misc.download_ipc.get(False)
                if what == 'shutdown_time':
                    misc.params['shutdown_time'] = value

        elif TS.unixtime('dl') > misc.params['shutdown_time']:
            raise TypeError("Download Stop")

        if misc.params['isFirst'] == True:
            misc.params['isFirst'] = False

            if len(data) < 800:
                try:
                    data_string = data.decode('utf-8')

                    if re.match('https?://', data_string):
                        # If we are getting a redirect then we don't mind, we
                        # just put it in the stream and then we leave
                        misc.queue.put(('stream', data_string.strip()))
                        return False

                    # A pls style playlist
                    elif re.findall('File\d', data_string, re.M):
                        logging.info(
                            '%d: Found a pls, using the File1 parameter' %
                            (nl['pid'], ))
                        matches = re.findall('File1=(.*)\n', data_string, re.M)
                        misc.queue.put(('stream', matches[0].strip()))
                        return False

                # If it gets here it's binary ... I guess that's fine.
                except:
                    pass

        # This provides a reliable way to determine bitrate.  We look at how much
        # data we've received between two time periods
        misc.queue.put(
            ('heartbeat', (TS.unixtime('hb'), nl['pid'], len(data))))

        if not nl['stream']:
            try:
                nl['stream'] = open(file_name, 'wb')

            except Exception as exc:
                logging.critical(
                    "%d: Unable to open %s. Can't record. Must exit." %
                    (nl['pid'], file_name))
                return False

        nl['stream'].write(data)