Ejemplo n.º 1
Archivo: audio.py Proyecto: EQ4/DRR
def list_slice_stream(start_info, start_sec):
  # This is part of the /live/time feature ... this streams files hopping from one to the next
  # in a live manner ... it constructs things while running ... hopping to the next stream in real time.
  pid = misc.change_proc_name("%s-audiostream" % misc.config['callsign'])
  block_count = 0

  current_info = start_info

  # get the regular map so we know where to start from
  siglist, offset = signature(current_info['name'])
  start_frame = min(max(int(start_sec / _FRAME_LENGTH), 0), len(offset) - 1)
  start_byte = offset[start_frame]

  while True:
    stream_handle = cloud.get(current_info['name'])
    sig, offset = signature(stream_handle)
    logging.debug("-- opening %s %d %d %d" % (current_info['name'], current_info['size'], stream_handle.tell(), start_byte) )

    # This helps us determine when we are at EOF ... which
    # we basically define as a number of seconds without any
    # valid read.
    times_none = 0
    block_count = 0
    read_size = 0

    while True:
      # So we want to make sure that we only send out valid, 
      # non-corrupt mp3 blocks that start and end
      # at reasonable intervals.
      if len(offset) > 1:
        read_size = offset[1] - offset[0]

        block = stream_handle.read(read_size)
        block_count += 1
        times_none = 0 
        yield block

        times_none += 1
        if times_none > 20:
        elif times_none > 1:
          #print stream_handle.tell(), current_info['size'], times_none, len(block)
          # See if there's a next file that we can immediately go to
          next_info, offset = cloud.get_next(current_info)
          if next_info: break

        # We wait 1/2 second and then try this process again, hopefully
        # the disk has sync'd and we have more data
        sig, offset = signature(stream_handle)

    logging.debug("-- closing %s %d %d %d %d" % (current_info['name'], current_info['size'], stream_handle.tell(), block_count, (stream_handle.tell() - start_byte) / (128000 / 8) / 60.0))
    pos = stream_handle.tell() 

    # If we are here that means that we ran out of data on our current
    # file.  The first things we should do is see if there is a next file
    next_info, offset = cloud.get_next(current_info)

    if next_info:

      # If there is we find the stitching point
      args = stitch([current_info, next_info], force_stitch=True)
      print args, pos

      # We make it our current file
      current_info = next_info

      # Now we can assume that our args[1] is going to have all
      # the information pertaining to where the new file should pick
      # up from - all we really need is the start_byte
      if len(args) == 2:
        start_byte = args[1]['start_byte'] 
        # print "Starting at ", start_byte

        logging.warn("Live stitching failed")

      # Otherwise we have to bail
Archivo: cloud.py Proyecto: EQ4/DRR
def prune_process(lockMap, reindex=False, force=False):
  # This is internal, call prune() directly. This is a normally blocking
  # process that is prepared by prune(), making it easily callable asynchronously 
  # If another prune is running then we just bail
  if not lockMap['prune'].acquire(False) and not force:
    logging.warn("Tried to run another prune whilst one is running. Aborting")
    return True

  # If we are the first process then we need to make sure that the webserver is up before
  # we do this to check to see if we are official

  pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign'])

  # We want to run the am_i_official here since it could block on a DNS lookup


    return None

  db = DB.connect()

  archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND
  cutoff = TS.unixtime('prune') - archive_duration

  # Remove all slices older than 4 hours.
  slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND

  cloud_cutoff = None
  if misc.config['cloud']:
    cloud_cutoff = TS.unixtime('prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND

  # Put thingies into the cloud.
  count = 0
  for file_name in glob('*/*.mp3'):
    # Depending on many factors this could be running for hours
    # or even days.  We want to make sure this isn't a blarrrghhh
    # zombie process or worse yet, still running and competing with
    # other instances of itself.
    if not misc.manager_is_running():
      return None

    ctime = os.path.getctime(file_name)

    # print "Looking at ", file_name, ctime, cutoff, archive_duration,  misc.config['archivedays'], misc.am_i_official()
    # We observe the rules set up in the config.
    if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff:
      logging.debug("Prune[remove]: %s (ctime)" % file_name)
      count += 1 

    # We want to make sure we aren't archiving the slices
    elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice') and misc.am_i_official():
      logging.debug("Prune[cloud]: %s" % file_name)

      # Only unlink the file if I can successfully put it into the cloud.
      if put(file_name):

          logging.debug("Prune[cloud]: Couldn't remove %s" % file_name)

  for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS):
    ctime = os.path.getctime(file_name)

    # We observe the rules set up in the config.
    if ctime < cutoff:
      logging.debug("Prune: %s" % file_name)
      count += 1 

  # The map names are different since there may or may not be a corresponding
  # cloud thingie associated with it.
  db = DB.connect()

  # Don't do this f*****g shit at all because f**k this so hard.
  #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200))

  unlink_list = db['c'].execute('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall()

  for file_name_tuple in unlink_list:
    file_name = str(file_name_tuple[0])
    id = file_name_tuple[1]

    logging.debug("Prune[remove]: %s (unlink list)" % file_name)
    # If there's a cloud account at all then we need to unlink the 
    # equivalent mp3 file
    if cloud_cutoff and misc.am_i_official():

      # After we remove these streams then we delete them from the db.
      db['c'].execute('delete from streams where id = %d' % id)

    # now only after we've deleted from the cloud can we delete the local file
    if os.path.exists(file_name):
      count += 1

  logging.info("Found %d files older than %s days." % (count, misc.config['archivedays']))
Archivo: audio.py Proyecto: EQ4/DRR
def list_slice(list_in, name_out, duration_sec, start_sec=0, do_confirm=False):
    # Takes some stitch list, list_in and then create a new one based on the start and end times
    # by finding the closest frames and just doing an extraction.
    # Setting the duration as None is equivalent to a forever stream
    pid = misc.change_proc_name("%s-audioslice" % misc.config['callsign'])

    out = open(name_out, 'wb+')
    buf_confirm = None

    # print 'slice', duration_sec, start_sec
    for ix in range(0, len(list_in)):
        item = list_in[ix]

        # get the regular map
        siglist, offset = signature(item['name'])

        if ix == len(list_in) - 1:
            frame_end = min(int(ceil(duration_sec / _FRAME_LENGTH)),
                            len(offset) - 1)

            frame_end = len(offset) - 1

        if ix == 0:
            frame_start = min(max(int(start_sec / _FRAME_LENGTH), 0),
                              len(offset) - 1)
            duration_sec -= (item['duration_sec'] - start_sec)

            frame_start = item['start_offset']
            duration_sec -= item['duration_sec']

        # try and get the mp3
        fin = cloud.get(item['name'])

        if fin:

            if do_confirm and buf_confirm:
                fin.seek(-16, 1)
                buf = fin.read(16)
                if buf != buf_confirm:
                    logging.warn("Slicing error at %d of %s" %
                                 (fin.tell(), item['name']))

            # print 'off---',frame_end, frame_start, len(offset)
            buf = fin.read(offset[frame_end] - offset[frame_start])

            if do_confirm:
                buf_confirm = buf[-16]


        # If we fail to get the mp3 file then we can suppose that
        # the map file is bad so we just wince and remove it.
            logging.warn("Unable to find %s's corresponding mp3, deleting" %


    # If we failed to do anything this is a tragedy
    # and we just dump the file
    # We take files under some really nominal threshold as being invalid.
    if os.path.getsize(name_out) < 1000:
        logging.warn("Unable to create %s - no valid slices" % name_out)
Archivo: server.py Proyecto: EQ4/DRR
def manager(config):
    # Main flask process that manages the end points.
    app = Flask(__name__)

    def webserver_shutdown(signal=15, frame=None):
        title = SP.getproctitle()
        logging.info('[%s:%d] Shutting down' % (title, os.getpid()))

    def success(message):
        return jsonify({'res': True, 'message': message}), 200

    def fail(message):
        return jsonify({'res': False, 'message': message}), 500

    # from http://blog.asgaard.co.uk/2012/08/03/http-206-partial-content-for-flask-python
    def after_request(response):
        # Supports 206 partial content requests for podcast streams.
        response.headers.add('Accept-Ranges', 'bytes')
        logging.info('ua - %s' % request.headers.get('User-Agent'))
        return response

    def send_file_partial(path, requested_path, file_name=None):
        # Wrapper around send_file which handles HTTP 206 Partial Content
        # (byte ranges)

        # If we requested something that isn't around, then we bail.
        if not os.path.exists(path):
            return 'File %s not found. Perhaps the stream is old?' % requested_path, 404

        range_header = request.headers.get('Range', None)
        if not range_header:
            with open(path, 'rb') as f:
                data = f.read()

            rv = Response(data,
            if not file_name:
                file_name = os.path.basename(path)

                           'attachment; filename="%s"' % file_name)
            return rv

        size = os.path.getsize(path)
        byte1, byte2 = 0, None

        m = re.search('(\d+)-(\d*)', range_header)
        g = m.groups()

        if g[0]:
            byte1 = int(g[0])

        if g[1]:
            byte2 = int(g[1])

        length = size - byte1
        if byte2 is not None:
            length = byte2 - byte1

        data = None
        with open(path, 'rb') as f:
            data = f.read(length + 1)

        rv = Response(data,
        disposition = 'attachment;'

        if file_name:
            disposition += ' file_name="%s"' % file_name

        rv.headers.add('Content-Disposition', disposition)
                       'bytes {0}-{1}/{2}'.format(byte1, byte1 + length, size))

        return rv

    # From http://stackoverflow.com/questions/13317536/get-a-list-of-all-routes-defined-in-the-app
    def site_map():
    Shows all the end points supported by the current server, the options 
    and the documentation.
        output = [
            '-=#| Welcome to indycast %s API help |#=-' % misc.__version__, ''

        for rule in app.url_map.iter_rules():

            if rule.endpoint == 'static': continue

            options = {}
            for arg in rule.arguments:
                options[arg] = "[{0}]".format(arg)

            url = url_for(rule.endpoint, **options)
            line = "{:15s} {}".format(
                url, app.view_functions[rule.endpoint].__doc__)

        return Response('\n'.join(output), mimetype='text/plain')

    def my_uuid():
    Returns this server's uuid which is generated each time it is run.
    This is used to determine whether this is the official server or not.
        return misc.config['uuid']

    def database():
    Backs up the current sqlite3 db and sends a gzipped version of it as the response.
        filename = '%s/%s-%s.gz' % (misc.DIR_BACKUPS, misc.config['callsign'],
        os.popen('sqlite3 config.db .dump | gzip -9 > %s' % filename)
        return send_file(filename)

    def reindex():
    Starts the prune process which cleans up and offloads audio files but also re-index 
    the database.

    This is useful in the cases where bugs have led to improper registration of the 
    streams and a busted building of the database.  It's fairly expensive in I/O costs 
    so this shouldn't be done as the default.
        return success('Reindexing...')

    def prune():
    Starts the prune sub-process which cleans up and offloads audio files 
    following the rules outlined in the configuration file (viewable with the stats call)
        return success('Pruning...')

    def send_named_stream(time, name):
    Similar to the /slices/path endpoint, this end point sends a stream that is at time <time> with
    name <name>.
        return send_stream(time, download_name=name)

    def send_stream(path, download_name=None):
    Downloads a stream from the server. The path is callsign-date_duration.mp3

      * callsign: The callsign returned by /stats
      * date: in the format YYYYMMDDHHMM such as 201508011005 for 
        2015-08-01 10:05
      * duration: A value, in minutes, to return.

    The mp3 extension should be used regardless of the actual format of the stream -
    although the audio returned will be in the streams' native format.
    The streams are created and sent on-demand, so there may be a slight delay before
    it starts.
        base_dir = "%s%s/" % (config['storage'], misc.DIR_SLICES)

        if not path.startswith(config['callsign']):
            path = "%s-%s" % (config['callsign'], path)

        if not path.endswith('.mp3'):
            path = "%s.mp3" % path

        file_name = base_dir + path

        # If the file doesn't exist, then we need to slice it and create it based on our query.
        if not os.path.isfile(file_name):

            # This tells us that if it were to exist, it would be something
            # like this.
            request_info = audio.stream_info(file_name)

            # we can do something rather specific here ...
            # first we get our generic stream list using our start_minute from the info.
            stream_list, episode_list = cloud.find_streams(
                duration_min=request_info['duration_sec'] / 60.0)

            for ep in episode_list:
                episode = ep[0]
                first_slice = episode[0]

                if first_slice['week_number'] == request_info['week_number']:
                    # This means that we've found the episode that we want
                    # We will block on this.
                    relative_start_minute = request_info[
                        'start_minute'] - first_slice['start_minute']

                        duration_minute=request_info['duration_sec'] / 60.0)

                    # And break out of our loop ... now everything should exist.

        return send_file_partial("%s/%s" % (base_dir, path),

    def restart():
    Restarts an instance. This does so in a gapless non-overlapping way.
        return success('restarting...')

    def upgrade():
    Goes to the source directory, pulls down the latest from git
    and if the versions are different, the application restarts.
        cwd = os.getcwd()

        os.system('git pull')

        # See what the version is after the pull
        newversion = os.popen("git describe").read().strip()

        if newversion != misc.__version__:
            os.system('pip install --user -r requirements.txt')

            # from http://blog.petrzemek.net/2014/03/23/restarting-a-python-script-within-itself/
            return success("Upgrading from %s to %s" %
                           (misc.__version__, newversion))

        return success('Version %s is current' % misc.__version__)

    def heartbeat():
    A low resource version of the /stats call ... this is invoked
    by the server health check.  Only the vitals are reported.
    It helps us see if disk space is going nuts or if we aren't recording
    right now.
    This allows us to check if a restart happened between invocations.
        return jsonify(misc.base_stats()), 200

    def stats():
    Reports various statistical metrics on a particular server.  
    Use this with the graph.py tool to see station coverage.
        db = DB.connect()

        stats = misc.base_stats()

            db['c'].execute('select sum(read_count) from intents').fetchone()
            os.popen("df -h / | tail -1").read().strip(),
            # Reporting the list as fractional GB is more useful.
            DB.all('streams', sort_by='start_unix'),

        return jsonify(stats), 200

    # Using http://flask.pocoo.org/docs/0.10/patterns/streaming/ as a reference.
    def live(start, offset_min=0):
    Sends off a live-stream equivalent.  Two formats are supported:

     * duration - In the form of strings such as "1pm" or "2:30pm"
     * offset - starting with a negative "-", this means "from the present".
        For instance, to start the stream from 5 minutes ago, you can do "-5"

        if start[0] == '-' or start.endswith('min'):
            # dump things like min or m
            start = re.sub('[a-z]', '', start)
            return redirect('/live/m%f' %
                            (float(TS.minute_now() - abs(float(start)))),

        # The start is expressed in times like "11:59am ..." We utilize the
        # library we wrote for streaming to get the minute of day this is.
        if start[0] == 'm':
            requested_minute = float(start[1:]) % TS.ONE_DAY_MINUTE

            candidate = start
            requested_minute = TS.to_utc('mon', candidate) - offset_min

        offset_sec = 0
        range_header = request.headers.get('Range', None)
        if range_header:
            m = re.search('(\d+)-', range_header)
            g = m.groups()
            if g[0]:
                byte1 = int(g[0])

                # We use the byte to compute the offset
                offset_sec = float(byte1) / ((int(DB.get('bitrate')) or 128) *
                                             (1000 / 8.0))

        #print "--- REQUEST @ ", start, range_header, offset_sec
        current_minute = TS.minute_now() % TS.ONE_DAY_MINUTE

        now_time = TS.now()
        requested_time = now_time - timedelta(
            minutes=current_minute) + timedelta(minutes=requested_minute)

        # print requested_time, now_time, requested_minute, current_minute
        # If the requested minute is greater than the current one, then we can presume that
        # the requested minute refers to yesterday ... as in, someone wants 11pm
        # and now it's 1am.
        if requested_minute > current_minute:
            requested_time -= timedelta(days=1)

        # It's important to do this AFTER the operation above otherwise we wrap around to yesterday
        requested_time += timedelta(seconds=offset_sec)

        # Get the info for the file that contains this timestamp
        start_info, requested_time_available = cloud.get_file_for_ts(
            target_time=requested_time, bias=-1)
        requested_time = max(requested_time, requested_time_available)
        start_second = (requested_time -

        response = Response(audio.list_slice_stream(start_info, start_second),

        return response

    def at(start, duration_string='1hr'):
    Sends a stream using a human-readable (and human-writable) definition 
    at start time.  This uses the dateutils.parser library and so strings 
    such as "Monday 2pm" are accepted.

    Because the space, 0x20 is such a pain in HTTP, you can use "_", 
    "-" or "+" to signify it.  For instance,


    Will work fine
        dt = TS.str_to_time(start)
        duration_min = TS.duration_parse(duration_string)
        endpoint = '%s-%s_%d.mp3' % (misc.config['callsign'],
                                     TS.ts_to_name(dt), duration_min)
        return send_stream(endpoint, download_name=endpoint)

    def at_method2(weekday, start, duration_string):
    This is identical to the stream syntax, but instead it is similar to
    /at ... it uses the same notation but instead returns an audio file

    You must specify a single weekday ... I know, total bummer.
        weekday_map = {
            'mon': 'monday',
            'tue': 'tuesday',
            'wed': 'wednesday',
            'thu': 'thursday',
            'fri': 'friday',
            'sat': 'saturday',
            'sun': 'sunday'

        # The alternative form for this is something like
        # /tuesday_8pm/1hr/showname.xml
        if duration_string.count('.') > 0:
            dt = TS.str_to_time(weekday)

            # order is a little incompatible.
            return stream(weekday=TS.to_minute(dt),

        if weekday not in weekday_map:
            return 'The first parameter, %s, is not a recognized weekday.' % weekday

        return at("%s_%s" % (weekday_map[weekday], start), duration_string)

    def stream(weekday, start, duration_string, showname):
    Returns a podcast, m3u, or pls file based on the weekday, start and duration.
    This is designed to be read by podcasting software such as podkicker, 
    itunes, and feedburner.

    weekdays are defined as mon, tue, wed, thu, fri, sat, sun.

    If a show occurs multiple times per week, this can be specified with
    a comma.  for instance,

    The showname should be followed by an xml, pls, or m3u extension.

    It should also be viewable in a modern web browser.

    If you can find a podcaster that's not supported, please send an email 
    to [email protected].

        if isinstance(weekday, (float)):
            start_time_list = [weekday]
            weekday_list = [TS.WEEKDAY_LIST[int(weekday / (60 * 24))]]

            # Supports multiple weekdays
            weekday_list = weekday.split(',')
            start_time_list = [TS.to_utc(day, start) for day in weekday_list]

        duration_min = TS.duration_parse(duration_string)

        # This means we failed to parse
        if not duration_min:
            return server.do_error("duration '%s' is not set correctly" %

        if not isinstance(start_time_list[0], (int, long, float)):
            return server.do_error(
                'weekday and start times are not set correctly')

        # In #22 We're going to add 2 minutes to the duration to make sure that we get
        # the entire episode.
        duration_min += 2

        # And according to #149 we also go a minute back for the start time ...
        # we need to do a little math to make sure we don't get a -1 edge case
        start_time_list = [
            (TS.MINUTES_PER_WEEK + offset - 1) % TS.MINUTES_PER_WEEK
            for offset in start_time_list

        # If we are here then it looks like our input is probably good.

        # Strip the .xml from the showname ... this will be used in our xml.
        file_type = showname[-3:]
        showname = showname[:-4]

        # We come in with spaces as underscores so here we translate that back
        showname = re.sub('_', ' ', showname)

        # This will register the intent if needed for future recordings
        # (that is if we are in ondemand mode)
        DB.register_intent(start_time_list, duration_min)

        # Make sure that we have all of our streams registered before trying
        # to infer what we can send to the user.

        # Look for streams that we have which match this query and duration.
        # This will also create slices if necessary in a sub process.
        # The list of files that returns will include this not-yet-created
        # file-name as essentially a "promise" to when it will be made.
        feed_list = cloud.find_and_make_slices(start_time_list, duration_min)
        # print feed_list

        # Then, taking those two things, make a feed list from them.
        return server.generate_feed(file_type=file_type,

    print __name__
    if __name__ == 'lib.server':
        pid = misc.change_proc_name("%s-webserver" % config['callsign'])

        signal.signal(signal.SIGUSR1, webserver_shutdown)
        # When we do an upgrade or a restart, there's a race condition of getting to start this server
        # before the previous one has cleaned up all the socket work.  So if the time is under our
        # patience threshold then we sleep a second and just try again, hoping that it will work.
        patience = misc.PROCESS_DELAY * 2
        attempt = 1

        start = TS.unixtime('delay')
        while TS.unixtime('delay') - start < (patience + 3):
                print 'Listening on %s' % config['port']
                app.run(threaded=True, port=config['port'], host='')

            except Exception as exc:
                if TS.unixtime('delay') - start < patience:
                    print '[attempt: %d] Error, can not start server ... perhaps %s is already in use?' % (
                        attempt, config['port'])
                    attempt += 1
                    time.sleep(misc.PROCESS_DELAY / 4)

                elif TS.unixtime('delay') - start < (patience + 4):
                    pid = os.popen(
                        "netstat -anlp | grep :%s | awk ' { print $NF }' | sed 's/\/.*//'"
                        % config['port']).read().strip().split('\n')[0]

                        pid_numeric = int(pid)
                        print "F**k it, I'm killing %s." % pid


                    time.sleep(misc.PROCESS_DELAY / 4)