Esempio n. 1
0
  def live(start, offset_min=0):
    """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
    DB.incr('hits-live')
    if start[0] == '-' or start.endswith('min'):
      # dump things like min or m
      start = re.sub('[a-z]', '', start)
      return redirect('/live/m%f' % (float(TS.minute_now() - abs(float(start)))), code=302)

    # The start is expressed in times like "11:59am ..." We utilize the
    # library we wrote for streaming to get the minute of day this is.
    if start[0] == 'm':
      requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE 

    else:
      candidate = start
      requested_minute = TS.to_utc('mon', candidate) - offset_min

    offset_sec = 0
    range_header = request.headers.get('Range', None)
    if range_header:
      m = re.search('(\d+)-', range_header)
      g = m.groups()
      if g[0]: 
        byte1 = int(g[0])

        # We use the byte to compute the offset
        offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) * (1000 / 8.0))
    

    #print "--- REQUEST @ ", start, range_header, offset_sec
    current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

    now_time = TS.now()
    requested_time = now_time - timedelta(minutes=current_minute) + timedelta(minutes=requested_minute)

    # print requested_time, now_time, requested_minute, current_minute
    # If the requested minute is greater than the current one, then we can presume that
    # the requested minute refers to yesterday ... as in, someone wants 11pm
    # and now it's 1am.
    if requested_minute > current_minute:
      requested_time -= timedelta(days=1)

    # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
    requested_time += timedelta(seconds=offset_sec)

    # Get the info for the file that contains this timestamp
    start_info, requested_time_available = cloud.get_file_for_ts(target_time=requested_time, bias=-1)
    requested_time = max(requested_time, requested_time_available)
    start_second = (requested_time - start_info['start_date']).total_seconds()

    response = Response(audio.list_slice_stream(start_info, start_second), mimetype=audio.our_mime())

    return response
Esempio n. 2
0
  def send_stream(path, download_name=None):
    """
    Downloads a stream from the server. The path is callsign-date_duration.mp3

      * callsign: The callsign returned by /stats
      * date: in the format YYYYMMDDHHMM such as 201508011005 for 
        2015-08-01 10:05
      * duration: A value, in minutes, to return.

    The mp3 extension should be used regardless of the actual format of the stream -
    although the audio returned will be in the streams' native format.
    
    The streams are created and sent on-demand, so there may be a slight delay before
    it starts.
    """
    DB.incr('hits-dl')
    base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES)

    if not path.startswith(config['callsign']):
      path = "%s-%s" % (config['callsign'], path)

    if not path.endswith('.mp3'):
      path = "%s.mp3" % path

    file_name = base_dir + path

    # If the file doesn't exist, then we need to slice it and create it based on our query.
    # Also, if it's a zero byte file, then we try to create it again.
    if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0:
      cloud.register_stream_list()

      # This tells us that if it were to exist, it would be something
      # like this.
      request_info = audio.stream_info(file_name)
      logging.info(("expected value", request_info))

      # we can do something rather specific here ... 
      #
      # first we get our generic stream list using our start_minute from the info.
      stream_list, episode_list = cloud.find_streams(start_list=[request_info['start_minute']], duration_min=request_info['duration_sec'] / 60.0)
      
      for ep in episode_list:
        episode = ep[0]
        first_slice = episode[0]

        if first_slice['week_number'] == request_info['week_number']:
          # This means that we've found the episode that we want
          # We will block on this.
          relative_start_minute = request_info['start_minute'] - first_slice['start_minute']

          logging.info(episode)
          audio.stitch_and_slice_process(file_list=episode, relative_start_minute=relative_start_minute, duration_minute=request_info['duration_sec'] / 60.0, destination_path=path)

          # And break out of our loop ... now everything should exist.
          break

    return send_file_partial("%s/%s" % (base_dir, path), requested_path=path, file_name=download_name)
Esempio n. 3
0
    def live(start, offset_min=0):
        """ 
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

    """
        DB.incr('hits-live')
        if start[0] == '-' or start.endswith('min'):
            # dump things like min or m
            start = re.sub('[a-z]', '', start)
            return redirect('/live/m%f' %
                            (float(TS.minute_now() - abs(float(start)))),
                            code=302)

        # The start is expressed in times like "11:59am ..." We utilize the
        # library we wrote for streaming to get the minute of day this is.
        if start[0] == 'm':
            requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE

        else:
            candidate = start
            requested_minute = TS.to_utc('mon', candidate) - offset_min

        offset_sec = 0
        range_header = request.headers.get('Range', None)
        if range_header:
            m = re.search('(\d+)-', range_header)
            g = m.groups()
            if g[0]:
                byte1 = int(g[0])

                # We use the byte to compute the offset
                offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) *
                                             (1000 / 8.0))

        #print "--- REQUEST @ ", start, range_header, offset_sec
        current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

        now_time = TS.now()
        requested_time = now_time - timedelta(
            minutes=current_minute) + timedelta(minutes=requested_minute)

        # print requested_time, now_time, requested_minute, current_minute
        # If the requested minute is greater than the current one, then we can presume that
        # the requested minute refers to yesterday ... as in, someone wants 11pm
        # and now it's 1am.
        if requested_minute > current_minute:
            requested_time -= timedelta(days=1)

        # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
        requested_time += timedelta(seconds=offset_sec)

        # Get the info for the file that contains this timestamp
        start_info, requested_time_available = cloud.get_file_for_ts(
            target_time=requested_time, bias=-1)

        if start_info is None or requested_time_available is None:
            return do_error("Can't find any matching files")

        requested_time = max(requested_time, requested_time_available)
        start_second = (requested_time -
                        start_info['start_date']).total_seconds()

        response = Response(audio.list_slice_stream(start_info, start_second),
                            mimetype=audio.our_mime())

        return response
Esempio n. 4
0
    def send_stream(path, download_name=None):
        """
    Downloads a stream from the server. The path is callsign-date_duration.mp3

      * callsign: The callsign returned by /stats
      * date: in the format YYYYMMDDHHMM such as 201508011005 for 
        2015-08-01 10:05
      * duration: A value, in minutes, to return.

    The mp3 extension should be used regardless of the actual format of the stream -
    although the audio returned will be in the streams' native format.
    
    The streams are created and sent on-demand, so there may be a slight delay before
    it starts.
    """
        DB.incr('hits-dl')
        base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES)

        if not path.startswith(config['callsign']):
            path = "%s-%s" % (config['callsign'], path)

        if not path.endswith('.mp3'):
            path = "%s.mp3" % path

        file_name = base_dir + path

        # If the file doesn't exist, then we need to slice it and create it based on our query.
        # Also, if it's a zero byte file, then we try to create it again.
        if not os.path.isfile(file_name) or os.path.getsize(file_name) == 0:
            cloud.register_stream_list()

            # This tells us that if it were to exist, it would be something
            # like this.
            request_info = audio.stream_info(file_name)
            logging.info(("expected value", request_info))

            # we can do something rather specific here ...
            #
            # first we get our generic stream list using our start_minute from the info.
            stream_list, episode_list = cloud.find_streams(
                start_list=[request_info['start_minute']],
                duration_min=request_info['duration_sec'] / 60.0)

            for ep in episode_list:
                episode = ep[0]
                first_slice = episode[0]

                if first_slice['week_number'] == request_info['week_number']:
                    # This means that we've found the episode that we want
                    # We will block on this.
                    relative_start_minute = request_info[
                        'start_minute'] - first_slice['start_minute']

                    logging.info(episode)
                    audio.stitch_and_slice_process(
                        file_list=episode,
                        relative_start_minute=relative_start_minute,
                        duration_minute=request_info['duration_sec'] / 60.0,
                        destination_path=path)

                    # And break out of our loop ... now everything should exist.
                    break

        return send_file_partial("%s/%s" % (base_dir, path),
                                 requested_path=path,
                                 file_name=download_name)
Esempio n. 5
0
def read_config(config):
  import configparser
  # Reads a configuration file. 
  # Currently documented at https://github.com/kristopolous/DRR/wiki/Join-the-Federation
  Config = configparser.ConfigParser()
  Config.read(config)
  misc.config = misc.config_section_map('Main', Config)
  misc.PROCESS_PATH = os.path.dirname(os.path.realpath(__file__))
  
  defaults = {
    # The log level to be put into the indycast.log file.
    'loglevel': 'DEBUG',

    #
    # The relative, or absolute directory to put things in
    # The default goes into the home directory to try to avoid a situation
    # where we can't read or write something on default startup - also we keep
    # it out of a dot directory intentionally so that we don't fill up a home
    # directory in some hidden path - that's really dumb.
    #
    'storage': "%s/radio" % os.path.expanduser('~'),

    # The time to prolong a download to make sure that 
    # a restart or upgrade is seamless, in seconds.
    'restart_overlap': 15,

    # The TCP port to run the server on
    'port': 5000,

    # The time in looking to see if our stream is running
    'cycle_time': 7,

    # The time to start a stream BEFORE the lapse of the cascade-time
    'cascade_buffer': 15,

    # The time between cascaded streams
    'cascade_time': '15m',

    # Cloud credentials (ec2, azure etc)
    'cloud': None,

    #
    # When to get things off local disk and store to the cloud
    # This means that after this many days data is sent remote and then 
    # retained for `archivedays`.  This makes the entire user-experience
    # a bit slower of course, and has an incurred throughput cost - but
    # it does save price VPS disk space which seems to come at an unusual
    # premium.
    #
    'disk_archive': '1.20d',

    # The (day) duration we should be archiving things.
    'cloud_archive': '14d',
    
    # Run the pruning every this many days (float)
    'prune_every': '0.5d'
  }

  for k, v in list(defaults.items()):
    if k not in misc.config:
      misc.config[k] = v

    misc.config[k] = unit_convert_to_sec(misc.config[k])

  misc.config['port'] = int(misc.config['port'])

  # In case someone is specifying ~/radio 
  misc.config['storage'] = os.path.expanduser(misc.config['storage'])
  misc.config['_private'] = {}

  if misc.config['cloud']:
    misc.config['cloud'] = os.path.expanduser(misc.config['cloud'])

    if os.path.exists(misc.config['cloud']):
      # If there's a cloud conifiguration file then we read that too
      cloud_config = configparser.ConfigParser()
      cloud_config.read(misc.config['cloud'])

      # Things stored in the _private directory don't get reported back in a status
      # query.
      #
      # see https://github.com/kristopolous/DRR/issues/73 for what this is about.
      misc.config['_private']['azure'] = misc.config_section_map('Azure', cloud_config)

  if not os.path.isdir(misc.config['storage']):
    try:
      # If I can't do this, that's fine.
      os.mkdir(misc.config['storage'])

    except Exception as exc:
      # We make it from the current directory
      misc.config['storage'] = defaults['storage']

      if not os.path.isdir(misc.config['storage']):
        os.mkdir(misc.config['storage'])

  # Go to the callsign level in order to store multiple station feeds on a single
  # server in a single parent directory without forcing the user to decide what goes
  # where.
  misc.config['storage'] += '/%s/' % misc.config['callsign']
  misc.config['storage'] = re.sub('\/+', '/', misc.config['storage'])

  if not os.path.isdir(misc.config['storage']):
    os.mkdir(misc.config['storage'])

  # We have a few sub directories for storing things
  for subdir in [misc.DIR_STREAMS, misc.DIR_SLICES, misc.DIR_BACKUPS]:
    if not os.path.isdir(misc.config['storage'] + subdir):
      os.mkdir(misc.config['storage'] + subdir)

  # Now we try to do all this stuff again
  if os.path.isdir(misc.config['storage']):
    #
    # There's a bug after we chdir, where the multiprocessing is trying to grab the same 
    # invocation as the initial argv[0] ... so we need to make sure that if a user did 
    # ./blah this will be maintained.
    #
    if not os.path.isfile(misc.config['storage'] + __file__):
      os.symlink(os.path.abspath(__file__), misc.config['storage'] + __file__)

    conf_path = misc.config['storage'] + "config"
    if os.path.exists(conf_path):
      os.path.unlink(conf_path)

    os.symlink(os.path.abspath(config), conf_path)
    os.chdir(misc.config['storage'])

  else:
    logging.warning("Can't find %s. Using current directory." % misc.config['storage'])

  misc.PIDFILE_MANAGER = '%s/%s' % (os.getcwd(), 'pid-manager')
  # If there is an existing pid-manager, that means that 
  # there is probably another version running.
  if os.path.isfile(misc.PIDFILE_MANAGER):
    with open(misc.PIDFILE_MANAGER, 'r') as f:
      oldserver = f.readline()

      try:  
        logging.info("Replacing our old image")
        os.kill(int(oldserver), signal.SIGUSR1)
        # We give it a few seconds to shut everything down
        # before trying to proceed
        time.sleep(misc.PROCESS_DELAY / 2)

      except:
        pass
   
  # From https://docs.python.org/2/howto/logging.html
  numeric_level = getattr(logging, misc.config['loglevel'].upper(), None)
  if not isinstance(numeric_level, int):
    raise ValueError('Invalid log level: %s' % loglevel)

  logger = logging.getLogger()
  formatter = logging.Formatter(str(os.getpid()) + ':%(asctime)s:%(message)s', '%m%d_%H%M_%S')
  misc.handler = RotatingFileHandler('indycast.log', maxBytes=5000000, backupCount=2)
  misc.handler.setFormatter(formatter)
  misc.handler.setLevel(numeric_level)
  logger.setLevel(numeric_level)
  logger.addHandler(misc.handler)

  # Increment the number of times this has been run so we can track the stability of remote 
  # servers and instances.
  DB.upgrade()
  del(DB.upgrade)
  DB.incr('runcount')

  # This is how we discover if we are the official server or not.
  # Look at the /uuid endpoint to see how this magic works.
  misc.config['uuid'] = os.popen('uuidgen').read().strip()

  signal.signal(signal.SIGINT, misc.shutdown_handler)
  signal.signal(signal.SIGUSR1, misc.shutdown_handler)
  signal.signal(signal.SIGHUP, misc.do_nothing)
Esempio n. 6
0
def read_config(config):
    import ConfigParser
    # Reads a configuration file.
    # Currently documented at https://github.com/kristopolous/DRR/wiki/Join-the-Federation
    Config = ConfigParser.ConfigParser()
    Config.read(config)
    misc.config = misc.config_section_map('Main', Config)
    misc.PROCESS_PATH = os.path.dirname(os.path.realpath(__file__))

    defaults = {
        # The log level to be put into the indycast.log file.
        'loglevel': 'DEBUG',

        #
        # The relative, or absolute directory to put things in
        # The default goes into the home directory to try to avoid a situation
        # where we can't read or write something on default startup - also we keep
        # it out of a dot directory intentionally so that we don't fill up a home
        # directory in some hidden path - that's really dumb.
        #
        'storage': "%s/radio" % os.path.expanduser('~'),

        # The (day) time to expire an intent to record
        'expireafter': 45,

        # The time to prolong a download to make sure that
        # a restart or upgrade is seamless, in seconds.
        'restart_overlap': 15,

        # The TCP port to run the server on
        'port': 5000,

        # The (day) duration we should be archiving things.
        'archivedays': 28,

        # The (second) time in looking to see if our stream is running
        'cycletime': 7,

        # The (second) time to start a stream BEFORE the lapse of the cascade-time
        'cascadebuffer': 15,

        # The (second) time between cascaded streams
        'cascadetime': 60 * 15,

        # Cloud credentials (ec2, azure etc)
        'cloud': None,

        #
        # When to get things off local disk and store to the cloud
        # This means that after this many days data is sent remote and then
        # retained for `archivedays`.  This makes the entire user-experience
        # a bit slower of course, and has an incurred throughput cost - but
        # it does save price VPS disk space which seems to come at an unusual
        # premium.
        #
        'cloudarchive': 1.20,

        # Run the pruning every this many days (float)
        'pruneevery': 0.5
    }

    for k, v in defaults.items():
        if k not in misc.config:
            misc.config[k] = v
        else:
            if type(v) is int: misc.config[k] = int(misc.config[k])
            elif type(v) is long: misc.config[k] = long(misc.config[k])
            elif type(v) is float: misc.config[k] = float(misc.config[k])

    # In case someone is specifying ~/radio
    misc.config['storage'] = os.path.expanduser(misc.config['storage'])
    misc.config['_private'] = {}

    if misc.config['cloud']:
        misc.config['cloud'] = os.path.expanduser(misc.config['cloud'])

        if os.path.exists(misc.config['cloud']):
            # If there's a cloud conifiguration file then we read that too
            cloud_config = ConfigParser.ConfigParser()
            cloud_config.read(misc.config['cloud'])

            # Things stored in the _private directory don't get reported back in a status
            # query.
            #
            # see https://github.com/kristopolous/DRR/issues/73 for what this is about.
            misc.config['_private']['azure'] = misc.config_section_map(
                'Azure', cloud_config)

    if not os.path.isdir(misc.config['storage']):
        try:
            # If I can't do this, that's fine.
            os.mkdir(misc.config['storage'])

        except Exception as exc:
            # We make it from the current directory
            misc.config['storage'] = defaults['storage']

            if not os.path.isdir(misc.config['storage']):
                os.mkdir(misc.config['storage'])

    # Go to the callsign level in order to store multiple station feeds on a single
    # server in a single parent directory without forcing the user to decide what goes
    # where.
    misc.config['storage'] += '/%s/' % misc.config['callsign']
    misc.config['storage'] = re.sub('\/+', '/', misc.config['storage'])

    if not os.path.isdir(misc.config['storage']):
        os.mkdir(misc.config['storage'])

    # We have a few sub directories for storing things
    for subdir in [misc.DIR_STREAMS, misc.DIR_SLICES, misc.DIR_BACKUPS]:
        if not os.path.isdir(misc.config['storage'] + subdir):
            os.mkdir(misc.config['storage'] + subdir)

    # Now we try to do all this stuff again
    if os.path.isdir(misc.config['storage']):
        #
        # There's a bug after we chdir, where the multiprocessing is trying to grab the same
        # invocation as the initial argv[0] ... so we need to make sure that if a user did
        # ./blah this will be maintained.
        #
        if not os.path.isfile(misc.config['storage'] + __file__):
            os.symlink(os.path.abspath(__file__),
                       misc.config['storage'] + __file__)

        os.chdir(misc.config['storage'])

    else:
        logging.warning("Can't find %s. Using current directory." %
                        misc.config['storage'])

    misc.PIDFILE_MANAGER = '%s/%s' % (os.getcwd(), 'pid-manager')
    # If there is an existing pid-manager, that means that
    # there is probably another version running.
    if os.path.isfile(misc.PIDFILE_MANAGER):
        with open(misc.PIDFILE_MANAGER, 'r') as f:
            oldserver = f.readline()

            try:
                logging.info("Replacing our old image")
                os.kill(int(oldserver), signal.SIGUSR1)
                # We give it a few seconds to shut everything down
                # before trying to proceed
                time.sleep(misc.PROCESS_DELAY / 2)

            except:
                pass

    # From https://docs.python.org/2/howto/logging.html
    numeric_level = getattr(logging, misc.config['loglevel'].upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)

    logger = logging.getLogger()
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s',
                                  '%Y%m%d_%H%M_%S')
    handler = RotatingFileHandler('indycast.log',
                                  maxBytes=2000000,
                                  backupCount=5)
    handler.setFormatter(formatter)
    handler.setLevel(numeric_level)
    logger.setLevel(numeric_level)
    logger.addHandler(handler)

    # Increment the number of times this has been run so we can track the stability of remote
    # servers and instances.
    DB.upgrade()
    del (DB.upgrade)
    DB.incr('runcount')

    # This is how we discover if we are the official server or not.
    # Look at the /uuid endpoint to see how this magic works.
    misc.config['uuid'] = os.popen('uuidgen').read().strip()

    signal.signal(signal.SIGINT, misc.shutdown_handler)
    signal.signal(signal.SIGUSR1, misc.shutdown_handler)
    signal.signal(signal.SIGHUP, misc.do_nothing)
Esempio n. 7
0
def stream_manager():
    import random

    # Manager process which makes sure that the
    # streams are running appropriately.
    callsign = misc.config['callsign']

    #
    # AAC bitrate is some non-trivial thing that even ffprobe doesn't
    # do a great job at. This solution looks at number of bits that
    # transit over the wire given a duration of time, and then uses
    # that to compute the bitrate, since in practice, that's what
    # bitrate effectively means, and why it's such an important metric.
    #
    # This is to compute a format agnostic bitrate
    # (see heartbeat for more information)
    #
    has_bitrate = DB.get('bitrate')
    first_time = 0
    total_bytes = 0
    normalize_delay = 6

    cascade_time = misc.config['cascadetime']
    cascade_buffer = misc.config['cascadebuffer']
    cascade_margin = cascade_time - cascade_buffer

    last_prune = 0
    last_success = 0

    change_state = None
    SHUTDOWN = 1
    RESTART = 2
    shutdown_time = None
    misc.download_ipc = Queue()

    # Number of seconds to be cycling
    cycle_time = misc.config['cycletime']

    process = None
    process_next = None

    # The manager will be the one that starts this.
    misc.pid_map['webserver'] = Process(target=server.manager,
                                        args=(misc.config, ))
    misc.pid_map['webserver'].start()

    file_name = None

    # A wrapper function to start a donwnload process
    def download_start(file_name):
        """ Starts a process that manages the downloading of a stream. """
        global g_download_pid

        g_download_pid += 1
        logging.info('Starting cascaded downloader #%d. Next up in %ds' %
                     (g_download_pid, cascade_margin))

        #
        # There may be a multi-second lapse time from the naming of the file to
        # the actual start of the download so we should err on that side by putting it
        # in the future by some margin
        #
        file_name = '%s/%s-%s.mp3' % (
            misc.DIR_STREAMS, callsign,
            TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
        process = Process(target=stream_download,
                          args=(callsign, misc.config['stream'],
                                g_download_pid, file_name))
        process.start()
        return [file_name, process]

    # see https://github.com/kristopolous/DRR/issues/91:
    # Randomize prune to offload disk peaks
    prune_duration = misc.config['pruneevery'] + (1 / 8.0 -
                                                  random.random() / 4.0)

    while True:
        #
        # We cycle this to off for every run. By the time we go throug the queue so long
        # as we aren't supposed to be shutting down, this should be toggled to true.
        #
        flag = False

        if last_prune < (TS.unixtime('prune') -
                         TS.ONE_DAY_SECOND * prune_duration):
            prune_duration = misc.config['pruneevery'] + (
                1 / 8.0 - random.random() / 4.0)
            # We just assume it can do its business in under a day
            misc.pid_map['prune'] = cloud.prune()
            last_prune = TS.unixtime('prune')

        TS.get_offset()

        lr_set = False
        while not misc.queue.empty():
            flag = True
            what, value = misc.queue.get(False)

            # The curl proces discovered a new stream to be
            # used instead.
            if what == 'stream':
                misc.config['stream'] = value
                logging.info("Using %s as the stream now" % value)
                # We now don't toggle to flag in order to shutdown the
                # old process and start a new one

            elif what == 'db-debug':
                DB.debug()

            elif what == 'shutdown':
                change_state = SHUTDOWN

            elif what == 'restart':
                logging.info(DB.get('runcount', use_cache=False))
                cwd = os.getcwd()
                os.chdir(misc.PROCESS_PATH)
                Popen(sys.argv)
                os.chdir(cwd)

                change_state = RESTART

                # Try to record for another restart_overlap seconds - make sure that
                # we don't perpetually put this in the future due to some bug.
                if not shutdown_time:
                    shutdown_time = TS.unixtime(
                        'dl') + misc.config['restart_overlap']
                    logging.info(
                        "Restart requested ... shutting down downloader at %s"
                        % TS.ts_to_name(shutdown_time, with_seconds=True))

                    while True:
                        time.sleep(20)
                        #logging.info(DB.get('runcount', use_cache=False))
                        logging.info(
                            ('ps axf | grep [%c]%s | grep python | wc -l' %
                             (misc.config['callsign'][0],
                              misc.config['callsign'][1:])).read().strip())
                        ps_out = int(
                            os.popen(
                                'ps axf | grep [%c]%s | grep python | wc -l' %
                                (misc.config['callsign'][0],
                                 misc.config['callsign'][1:])).read().strip())

                        if ps_out > 1:
                            logging.info(
                                "Found %d potential candidates (need at least 2)"
                                % ps_out)
                            # This makes it a restricted soft shutdown
                            misc.shutdown_real(do_restart=True)
                            misc.download_ipc.put(
                                ('shutdown_time', shutdown_time))
                            break

                        else:
                            Popen(sys.argv)
                            logging.warn(
                                "Couldn't find a replacement process ... not going anywhere."
                            )

            elif what == 'heartbeat':
                if not lr_set and value[1] > 100:
                    lr_set = True
                    DB.set('last_recorded', time.time())

                if not has_bitrate:

                    # Keep track of the first time this stream started (this is where our total
                    # byte count is derived from)
                    if not first_time:
                        first_time = value[0]

                    #
                    # Otherwise we give a large (in computer time) margin of time to confidently
                    # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
                    # the estimation falls within 98% of the destination.  I'm pretty sure it's really
                    # unlikely this will come out erroneous, but I really can't do the math, it's probably
                    # a T value, but I don't know. Anyway, whatevs.
                    #
                    # The normalize_delay here is for both he-aac+ streams which need to put in some frames
                    # before the quantizing pushes itself up and for other stations which sometimes put a canned
                    # message at the beginning of the stream, like "Live streaming supported by ..."
                    #
                    # Whe we discount the first half-dozen seconds as not being part of the total, we get a
                    # stabilizing convergence far quicker.
                    #
                    elif (value[0] - first_time > normalize_delay):
                        # If we haven't determined this stream's bitrate (which we use to estimate
                        # the amount of content is in a given archived stream), then we compute it
                        # here instead of asking the parameters of a given block and then presuming.
                        total_bytes += value[1]

                        # We still give it a time period after the normalizing delay in order to build enough
                        # samples to make a solid guess at what this number should be.
                        if (value[0] - first_time > (normalize_delay + 60)):
                            # We take the total bytes, calculate it over our time, in this case, 25 seconds.
                            est = total_bytes / (value[0] - first_time -
                                                 normalize_delay)

                            # We find the nearest 8Kb increment this matches and then scale out.
                            # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
                            bitrate = int(round(est / 1000) * 8)
                            DB.set('bitrate', bitrate)

        # Check for our management process
        if not misc.manager_is_running():
            logging.info("Manager isn't running")
            change_state = SHUTDOWN

        # The only way for the bool to be toggled off is if we are not in full-mode ...
        # we get here if we should NOT be recording.  So we make sure we aren't.
        if change_state == SHUTDOWN or (change_state == RESTART
                                        and TS.unixtime('dl') > shutdown_time):
            process = my_process_shutdown(process)
            process_next = my_process_shutdown(process_next)
            misc.shutdown_real()

        else:
            # Didn't respond in cycle_time seconds so kill it
            if not flag:
                process = my_process_shutdown(process)

            if not process and not change_state:
                file_name, process = download_start(file_name)
                last_success = TS.unixtime('dl')

            # If we've hit the time when we ought to cascade
            elif TS.unixtime('dl') - last_success > cascade_margin:

                # And we haven't created the next process yet, then we start it now.
                if not process_next:
                    file_name, process_next = download_start(file_name)

            # If our last_success stream was more than cascade_time - cascade_buffer
            # then we start our process_next

            # If there is still no process then we should definitely bail.
            if not process:
                misc.shutdown_real()

        #
        # This needs to be on the outside loop in case we are doing a cascade
        # outside of a full mode. In this case, we will need to shut things down
        #
        # If we are past the cascade_time and we have a process_next, then
        # we should shutdown our previous process and move the pointers around.
        #
        if not change_state and TS.unixtime(
                'dl') - last_success > cascade_time and process:
            logging.info("Stopping cascaded downloader")
            process.terminate()

            # If the process_next is running then we move our last_success forward to the present
            last_success = TS.unixtime('dl')

            # we rename our process_next AS OUR process
            process = process_next

            # and then clear out the old process_next pointer
            process_next = None

        # Increment the amount of time this has been running
        DB.incr('uptime', cycle_time)

        time.sleep(cycle_time)
Esempio n. 8
0
def stream_manager():
  import random

  # Manager process which makes sure that the
  # streams are running appropriately.
  callsign = misc.config['callsign']

  #
  # AAC bitrate is some non-trivial thing that even ffprobe doesn't
  # do a great job at. This solution looks at number of bits that
  # transit over the wire given a duration of time, and then uses
  # that to compute the bitrate, since in practice, that's what
  # bitrate effectively means, and why it's such an important metric.
  #
  # This is to compute a format agnostic bitrate
  # (see heartbeat for more information)
  #
  has_bitrate = DB.get('bitrate') 
  first_time = 0
  total_bytes = 0
  normalize_delay = 6

  cascade_time = misc.config['cascadetime']
  cascade_buffer = misc.config['cascadebuffer']
  cascade_margin = cascade_time - cascade_buffer

  last_prune = 0
  last_success = 0

  change_state = None
  SHUTDOWN = 1
  RESTART = 2
  shutdown_time = None
  misc.download_ipc = Queue()

  # Number of seconds to be cycling
  cycle_time = misc.config['cycletime']

  process = None
  process_next = None

  # The manager will be the one that starts this.
  misc.pid_map['webserver'] = Process(target=server.manager, args=(misc.config,))
  misc.pid_map['webserver'].start()

  file_name = None

  # A wrapper function to start a donwnload process
  def download_start(file_name):
    """ Starts a process that manages the downloading of a stream. """
    global g_download_pid

    g_download_pid += 1
    logging.info('Starting cascaded downloader #%d. Next up in %ds' % (g_download_pid, cascade_margin))

    #
    # There may be a multi-second lapse time from the naming of the file to
    # the actual start of the download so we should err on that side by putting it
    # in the future by some margin
    #
    file_name = '%s/%s-%s.mp3' % (misc.DIR_STREAMS, callsign, TS.ts_to_name(TS.now(offset_sec=misc.PROCESS_DELAY / 2)))
    process = Process(target=stream_download, args=(callsign, misc.config['stream'], g_download_pid, file_name))
    process.start()
    return [file_name, process]


  # see https://github.com/kristopolous/DRR/issues/91:
  # Randomize prune to offload disk peaks
  prune_duration = misc.config['pruneevery'] + (1 / 8.0 - random.random() / 4.0)

  while True:
    #
    # We cycle this to off for every run. By the time we go throug the queue so long 
    # as we aren't supposed to be shutting down, this should be toggled to true.
    #
    flag = False

    if last_prune < (TS.unixtime('prune') - TS.ONE_DAY_SECOND * prune_duration):
      prune_duration = misc.config['pruneevery'] + (1 / 8.0 - random.random() / 4.0)
      # We just assume it can do its business in under a day
      misc.pid_map['prune'] = cloud.prune()
      last_prune = TS.unixtime('prune')

    TS.get_offset()

    lr_set = False
    while not misc.queue.empty():
      flag = True
      what, value = misc.queue.get(False)

      # The curl proces discovered a new stream to be
      # used instead.
      if what == 'stream':
        misc.config['stream'] = value
        logging.info("Using %s as the stream now" % value)
        # We now don't toggle to flag in order to shutdown the
        # old process and start a new one

      elif what == 'db-debug':
        DB.debug()

      elif what == 'shutdown':
        change_state = SHUTDOWN

      elif what == 'restart':
        logging.info(DB.get('runcount', use_cache=False))
        cwd = os.getcwd()
        os.chdir(misc.PROCESS_PATH)
        Popen(sys.argv)
        os.chdir(cwd)

        change_state = RESTART

        # Try to record for another restart_overlap seconds - make sure that
        # we don't perpetually put this in the future due to some bug.
        if not shutdown_time:
          shutdown_time = TS.unixtime('dl') + misc.config['restart_overlap']
          logging.info("Restart requested ... shutting down downloader at %s" % TS.ts_to_name(shutdown_time, with_seconds=True))

          while True:
            time.sleep(20)
            #logging.info(DB.get('runcount', use_cache=False))
            logging.info(('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())
            ps_out = int(os.popen('ps axf | grep [%c]%s | grep python | wc -l' % (misc.config['callsign'][0], misc.config['callsign'][1:]) ).read().strip())

            if ps_out > 1: 
              logging.info("Found %d potential candidates (need at least 2)" % ps_out)
              # This makes it a restricted soft shutdown
              misc.shutdown_real(do_restart=True)
              misc.download_ipc.put(('shutdown_time', shutdown_time))
              break

            else:
              Popen(sys.argv)
              logging.warn("Couldn't find a replacement process ... not going anywhere.");

      elif what == 'heartbeat':
        if not lr_set and value[1] > 100:
          lr_set = True
          DB.set('last_recorded', time.time())

        if not has_bitrate: 

          # Keep track of the first time this stream started (this is where our total
          # byte count is derived from)
          if not first_time: 
            first_time = value[0]

          #
          # Otherwise we give a large (in computer time) margin of time to confidently
          # guess the bitrate.  I didn't do great at stats in college, but in my experiments,
          # the estimation falls within 98% of the destination.  I'm pretty sure it's really
          # unlikely this will come out erroneous, but I really can't do the math, it's probably
          # a T value, but I don't know. Anyway, whatevs.
          #
          # The normalize_delay here is for both he-aac+ streams which need to put in some frames
          # before the quantizing pushes itself up and for other stations which sometimes put a canned
          # message at the beginning of the stream, like "Live streaming supported by ..."
          #
          # Whe we discount the first half-dozen seconds as not being part of the total, we get a 
          # stabilizing convergence far quicker.
          #
          elif (value[0] - first_time > normalize_delay):
            # If we haven't determined this stream's bitrate (which we use to estimate 
            # the amount of content is in a given archived stream), then we compute it 
            # here instead of asking the parameters of a given block and then presuming.
            total_bytes += value[1]

            # We still give it a time period after the normalizing delay in order to build enough
            # samples to make a solid guess at what this number should be.
            if (value[0] - first_time > (normalize_delay + 60)):
              # We take the total bytes, calculate it over our time, in this case, 25 seconds.
              est = total_bytes / (value[0] - first_time - normalize_delay)

              # We find the nearest 8Kb increment this matches and then scale out.
              # Then we multiply out by 8 (for _K_ B) and 8 again for K _b_.
              bitrate = int( round (est / 1000) * 8 )
              DB.set('bitrate', bitrate)

    # Check for our management process
    if not misc.manager_is_running():
      logging.info("Manager isn't running");
      change_state = SHUTDOWN

    # The only way for the bool to be toggled off is if we are not in full-mode ... 
    # we get here if we should NOT be recording.  So we make sure we aren't.
    if change_state == SHUTDOWN or (change_state == RESTART and TS.unixtime('dl') > shutdown_time):
      process = my_process_shutdown(process)
      process_next = my_process_shutdown(process_next)
      misc.shutdown_real()

    else:
      # Didn't respond in cycle_time seconds so kill it
      if not flag:
        process = my_process_shutdown(process)

      if not process and not change_state:
        file_name, process = download_start(file_name)
        last_success = TS.unixtime('dl')

      # If we've hit the time when we ought to cascade
      elif TS.unixtime('dl') - last_success > cascade_margin:

        # And we haven't created the next process yet, then we start it now.
        if not process_next:
          file_name, process_next = download_start(file_name)

      # If our last_success stream was more than cascade_time - cascade_buffer
      # then we start our process_next
      
      # If there is still no process then we should definitely bail.
      if not process:
        misc.shutdown_real()

    #
    # This needs to be on the outside loop in case we are doing a cascade
    # outside of a full mode. In this case, we will need to shut things down
    #
    # If we are past the cascade_time and we have a process_next, then
    # we should shutdown our previous process and move the pointers around.
    #
    if not change_state and TS.unixtime('dl') - last_success > cascade_time and process:
      logging.info("Stopping cascaded downloader")
      process.terminate()

      # If the process_next is running then we move our last_success forward to the present
      last_success = TS.unixtime('dl')

      # we rename our process_next AS OUR process
      process = process_next

      # and then clear out the old process_next pointer
      process_next = None

    # Increment the amount of time this has been running
    DB.incr('uptime', cycle_time)

    time.sleep(cycle_time)