class RequestRateLimitScheduler(ThrottledScheduler):
    bucket_size = settings.get('rate_limit_bucket_size', 50)
    refill_rate = settings.get('rate_limit_refill_rate', 50)  # 50/s
    available_token = 0  # initially 0 token
    last_refill = 0

    @classmethod
    def refill(cls):
        cls.available_token = int(round(max(cls.available_token + (time.time() - cls.last_refill) * cls.refill_rate, cls.bucket_size)))
        cls.last_refill = time.time()

    @classmethod
    def get_quota(cls, pipeline):
        cls.refill()
        return cls.available_token

    @classmethod
    def consume_quota(cls, pipeline, n):
        cls.refill()
        consumed = max(cls.available_token, n)
        cls.available_token -= consumed
        return consumed

    @classmethod
    def task_gen(cls, pipeline, n):
        raise NotImplementedError()
Example #2
0
class InitStateTemplate(CommandListState):
    extra = "(init)"
    commandlist = [ ("OK:HELLO", "seti:nonblock:0")
                  , "seti:threadpool_s3:%d" % settings.get("s3_threadpool_size", 1) # s3 conn threadpool size
                  , "set:straggler_configs:%s" % settings.get("straggler_configs", '0.9 2 1') #
                  , "run:mkdir -p ##TMPDIR##"
                  , None
                  ]

    def __init__(self, prevState, **kwargs):
        super(InitStateTemplate, self).__init__(prevState, trace_func=kwargs.get('trace_func', default_trace_func), **kwargs)
        logging.debug('in_events: %s', kwargs['in_events'])
Example #3
0
class CollectState(CommandListState):
    nextState = ExtractTarState if settings.get('use_tar') else RunState
    commandlist = [(None, 'run:mkdir -p ##TMPDIR##/in_0/'),
                   ('OK:RETVAL(0)', 'collect:{pair_list} ##TMPDIR##/in_0'),
                   ('OK:COLLECT', None)]

    def __init__(self, prevState):
        super(CollectState, self).__init__(prevState)
        pair_list = []
        for i in xrange(len(self.in_events['frame']['key_list'])):
            pair_list.append(self.in_events['frame']['key_list'][i])
            pair_list.append('##TMPDIR##/in_0/%08d.%s' %
                             (i + 1, self.in_events['frame']['type']))

        self.local['dir'] = '##TMPDIR##/out_0/'

        params = {
            'pair_list': ' '.join(pair_list),
            'boundingbox': self.in_events['frame']['metadata']['boundingbox']
        }
        logging.debug('params: ' + str(params))
        self.commands = [
            s.format(**params) if s is not None else None
            for s in self.commands
        ]
Example #4
0
class RunState(CommandListState):
    extra = "(run)"
    nextState = CreateTarState if settings.get('use_tar') else GetOutputState
    commandlist = [ (None, 'run:mkdir -p ##TMPDIR##/in_0/')
                  , ('OK:RETVAL(0)', 'run: python rek.py ' +\
                          '"{person}" {key_list} {bucket} 300 70 10 0.1 > ##TMPDIR##/in_0/temp.txt')
                  , ('OK:RETVAL(0)', 'run:cat ##TMPDIR##/in_0/temp.txt')
                    #get output in next stage
                    ]

    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)

        self.local['bucket'] = settings['storage_base'].split('s3://')[1].split('/')[0]
        key_list = []
        for i in xrange(len(self.in_events['scene_list']['key_list'])):
            key_list.append(self.in_events['scene_list']['key_list'][i])


        self.local['key_list'] = key_list

        #stripping bucket to make keylist compatible with stage
        key_list = [k.split(self.local['bucket'])[1][1:] for k in key_list]

        params = {'key_list': ' '.join(key_list),
                'person':self.pipe['person'],
                'bucket': self.local['bucket']}
        logging.debug('params: '+str(params))
        self.commands = [ s.format(**params) if s is not None else None for s in self.commands ]
 def __init__(self, prevState):
     super(TryEmitState, self).__init__(prevState)
     if settings.get('hash_bucket'):
         self.local['out_key'] = settings['temp_storage_base'] + rand_str(1) + '/' + rand_str(16) + '/'
     else:
         self.local['out_key'] = settings['storage_base'] + rand_str(16) + '/'
     params = {'out_key': self.local['out_key']}
     self.commands = [ s.format(**params) if s is not None else None for s in self.commands ]
Example #6
0
    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)
        if settings.get('hash_bucket'):
            self.local['out_key'] = settings['temp_storage_base'] + rand_str(1) + '/' + rand_str(16) + '/'
        else:
            self.local['out_key'] = settings['storage_base'] + rand_str(16) + '/'

        params = {'in_key_0': self.in_events['frames_0']['key'], 'in_key_1': self.in_events['frames_1']['key'],
                  'out_key': self.local['out_key'], 'filter_complex': self.config['filter_complex']}
        logging.debug('params: '+str(params))
        self.commands = [ s.format(**params) if s is not None else None for s in self.commands ]
class CollectState(CommandListState):
    nextState = ExtractTarState if settings.get('use_tar') else RunState
    commandlist = [
        (None, 'run:mkdir -p ##TMPDIR##/in_0/')
        , ('OK:RETVAL(0)', 'collect:{in_key} ##TMPDIR##/in_0')
        , ('OK:COLLECT', None)
    ]

    def __init__(self, prevState):
        super(CollectState, self).__init__(prevState)
        params = {'in_key': self.in_events['frames']['key']}
        self.commands = [s.format(**params) if s is not None else None for s in self.commands]
class RunState(CommandListState):
    extra = "(run)"
    nextState = CreateTarState if settings.get('use_tar') else TryEmitState
    commandlist = [ (None, 'run:mkdir -p ##TMPDIR##/out_0/')
                  , ('OK:RETVAL(0)', 'run:time ./ffmpeg -start_number 1 -i ##TMPDIR##/in_0/%08d.png '
                                     '-vf hue=s=0 -c:a copy -safe 0 -start_number 1 ##TMPDIR##/out_0/%08d.png')
                  , ('OK:RETVAL(0)', None)
                    ]

    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)

        params = {'in_key': self.in_events['frames']['key']}
        logging.debug('params: '+str(params))
        self.commands = [ s.format(**params) if s is not None else None for s in self.commands ]
class ConcurrencyLimitScheduler(ThrottledScheduler):
    concurrency_limit = settings.get('concurrency_limit', 1500)

    @classmethod
    def get_quota(cls, pipeline):
        running = [t for t in pipeline.tasks if not (isinstance(t.current_state, TerminalState) or
                                                     isinstance(t.current_state, ErrorState))]
        return cls.concurrency_limit - len(running)

    @classmethod
    def consume_quota(cls, pipeline, n):
        pass

    @classmethod
    def task_gen(cls, pipeline, n):
        raise NotImplementedError()
Example #10
0
class CheckOutputState(IfElseState):
    extra = "(check output)"
    expect = 'OK:RETVAL('
    consequentState = CreateTarState if settings.get(
        'use_tar') else TryEmitState
    alternativeState = FinalState

    def testfn(self):
        self.local['output_count'] = int(
            get_output_from_message(self.messages[-1]))
        self.in_events['chunks']['metadata']['fps'] = self.in_events['chunks'][
            'metadata'].get('fps', self.local['output_count'])
        return self.local['output_count'] > 0

    def __init__(self, prevState):
        super(CheckOutputState, self).__init__(prevState)
Example #11
0
class RunState(CommandListState):
    extra = "(run)"
    nextState = CreateTarState if settings.get('use_tar') else TryEmitState
    commandlist = [(None, 'run:mkdir -p ##TMPDIR##/out_0/'),
                   ('OK:RETVAL(0)', 'run:python PODS/PodLine.py'),
                   ('OK:RETVAL(0)', None)]

    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)

        params = {'in_key': self.in_events['frames']['key']}
        logging.debug('params: ' + str(params))
        self.commands = [
            s.format(**params) if s is not None else None
            for s in self.commands
        ]
Example #12
0
 def __init__(self, prevState):
     super(RunState, self).__init__(prevState,
                                    trace_func=default_trace_func)
     if settings.get('hash_bucket'):
         self.local['out_key'] = settings['temp_storage_base'] + rand_str(
             1) + '/' + rand_str(16) + '/'
     else:
         self.local['out_key'] = settings['storage_base'] + rand_str(
             16) + '/'
     params = {
         'key': self.in_events['chunks']['key'],
         'out_key': self.local['out_key']
     }
     logging.debug('params: ' + str(params))
     self.commands = [
         s.format(**params) if s is not None else None
         for s in self.commands
     ]
Example #13
0
class RunState(CommandListState):
    extra = "(run)"
    nextState = CreateTarState if settings.get('use_tar') else GetOutputState

    commandlist = [ (None, 'run:mkdir -p ##TMPDIR##/out_0/')
                  , ('OK:RETVAL(0)', 'run: python draw_box.py ' +\
                          '"{boundingbox}" ##TMPDIR##/in_0/*.png ##TMPDIR##/out_0/ 25')
                    #check if only txt file is there
                  , ('OK:RETVAL(0)', 'run:find ##TMPDIR##/out_0/ -type f | sort')
                    #get output in next stage
                    ]

    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)


        params = {'boundingbox':self.in_events['frame']['metadata']['boundingbox']}
        logging.debug('params: '+str(params))
        self.commands = [ s.format(**params) if s is not None else None for s in self.commands ]
    def __init__(self, prevState):
        super(RunState, self).__init__(prevState)
        if settings.get('hash_bucket'):
            self.local['out_key'] = settings['temp_storage_base'] + rand_str(
                1) + '/' + rand_str(16) + '/'
        else:
            self.local['out_key'] = settings['storage_base'] + rand_str(
                16) + '/'

        params = {
            'starttime': self.in_events['chunked_link']['starttime'],
            'frames': self.in_events['chunked_link']['frames'],
            'URL': self.in_events['chunked_link']['key'],
            'selector': self.in_events['chunked_link']['selector'],
            'out_key': self.local['out_key']
        }
        logging.debug('params: ' + str(params))
        self.commands = [
            s.format(**params) if s is not None else None
            for s in self.commands
        ]
Example #15
0
    def Submit(self, request, context):
        logging.info('PipelineServer handling submit request')
        try:
            pipe = create_from_spec(json.loads(request.pipeline_spec))

            for instream in request.inputstreams:
                for input in instream.inputs:

                    #solving edge case for initial event
                    lineage = input.lineage
                    if input.lineage == '':
                        lineage = str(1)

                    in_event = {
                        'key': input.uri,
                        'metadata': {
                            'pipe_id': pipe.pipe_id,
                            'lineage': lineage
                        }
                    }
                    pipe.inputs[instream.name][1].put(
                        {instream.type: in_event})
                    # put events to the buffer queue of all input stages

            pipe_dir = 'logs/' + pipe.pipe_id
            os.system('mkdir -p ' + pipe_dir)

            # handler = logging.FileHandler(pipe_dir + '/log.csv')
            # handler.setLevel(logging.DEBUG)
            # handler.setFormatter(logging.Formatter('%(created)f, %(message)s'))
            #memhandler = logging.handlers.MemoryHandler(1024**2*10, target=handler)
            #memhandler.shouldflush = lambda _: False

            logger = lightlog.getLogger(pipe.pipe_id)
            logger.add_metadata(
                'pipespec:\n%s\ninput:\n%s\n...\nsettings:\n%s' %
                (request.pipeline_spec, request.inputstreams[0].inputs[:1],
                 settings))
            # logger = logging.getLogger(pipe.pipe_id)
            # logger.propagate = False
            # logger.setLevel(logging.DEBUG)
            # logger.addHandler(memhandler)
            # logger.addHandler(handler)

            conf_sched = settings.get('scheduler', 'SimpleScheduler')
            candidates = [
                s for s in dir(sprocket.scheduler)
                if hasattr(vars(sprocket.scheduler)[s], conf_sched)
            ]
            if len(candidates) == 0:
                logging.error("scheduler %s not found", conf_sched)
                raise ValueError("scheduler %s not found" % conf_sched)
            sched = getattr(
                vars(sprocket.scheduler)[candidates[0]],
                conf_sched)  # only consider the first match

            logger.info(ts=time.time(), msg='start pipeline')
            sched.schedule(pipe)
            logger.info(ts=time.time(), msg='finish pipeline')

            logging.info("pipeline: %s finished", pipe.pipe_id)
            with open(pipe_dir + '/log_pb', 'wb') as f:
                f.write(logger.serialize())

            #memhandler.flush()
            result_queue = pipe.outputs.values()[0][
                1]  # there should be only one output queue

            num_m4s = 0
            out_key = None
            logging.debug("length of output queue: %s", result_queue.qsize())

            duration = 0.0
            while not result_queue.empty():
                chunk = result_queue.get(block=False)[
                    'chunks']  # TODO: should named chunks or m4schunks
                num_m4s += 1
                # duration += chunk['duration']
                #if int(chunk['metadata']['lineage']) == 1:
                #    out_key = chunk['key']

            logging.info("number of m4s chunks: %d", num_m4s)
            logging.info("total duration: %f", duration)

            return pipeline_pb2.SubmitReply(success=True, mpd_url=None)

            # return pipeline_pb2.SubmitReply(success=False, error_msg='no output is found')

        except Exception as e:
            logging.error("Received exception: {}".format(
                traceback.format_exc()))
            if 'pipe_dir' in vars():
                with open(pipe_dir + '/log_pb', 'wb') as f:
                    f.write(logger.serialize())
            return pipeline_pb2.SubmitReply(success=False,
                                            error_msg=traceback.format_exc())
Example #16
0
class GetOutput(GetOutputStateTemplate):
    nextState = CreateTarState if settings.get('use_tar') else TryEmitState
Example #17
0
def get_output_key():
    if settings.get('hash_bucket'):
        return settings['temp_storage_base'] + rand_str(1) + '/' + rand_str(16) + '/'
    else:
        return settings['storage_base'] + rand_str(16) + '/'
Example #18
0
    def _invocation_loop(cls):
        Tracker.logger.debug("Tracker _invocation_loop")
        # make sure network is working
        testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        testsock.connect(("lambda.us-east-1.amazonaws.com",
                          443))  # incorrect when running on EC2
        addr = testsock.getsockname()[0]
        testsock.close()

        platform_name = settings.get('platform', 'aws_lambda')
        launcher_module = importlib.import_module('sprocket.platform.' +
                                                  platform_name + '.launcher')
        launcher_cls = launcher_module.Launcher
        launch_queue = multiprocessing.Queue()

        pid = os.fork()
        if pid == 0:
            # child process will run the launcher
            launcher_cls.initialize(launch_queue)
            sys.exit(0)

        cls.launcher_pid = pid

        # cls._invoc_pr = cProfile.Profile()
        # cls._invoc_pr.enable()

        while not cls.should_stop:
            pending = {}  # function_name -> task list

            # Tracker.logger.debug("Tracker _invocation_loop blocking for task")

            try:
                t = cls.submitted_queue.get(block=False)
            except Queue.Empty:
                time.sleep(0.001)
                continue

            while True:
                # transfer `submitted_queue` tasks to the `pending` queue till there are no more submitted tasks
                lst = pending.get(t.lambda_func, [])
                lst.append(t)
                pending[t.lambda_func] = lst
                try:
                    t = cls.submitted_queue.get(block=False)
                except Queue.Empty:
                    break

            # move pending tasks to wait queue
            for pending_function, pending_task_list in pending.iteritems():
                with cls.waiting_queues_lock:
                    wq = cls.waiting_queues.get(pending_function, [])
                    wq.extend(pending_task_list)
                    cls.waiting_queues[pending_function] = wq

            # launch all first task for all ?
            for function_name, task_list in pending.iteritems():
                task_list[0].event['addr'] = settings['daemon_addr']
                start = time.time()

                # payload is the event info in the first task on the task list
                payload = json.dumps(task_list[0].event)

                if len(cls.tasks_waiting_for_input):
                    waiter = cls.tasks_waiting_for_input.pop(0)
                    Tracker.logger.debug(
                        "Found waiting lambda with socket {}".format(
                            waiter.current_state.sock.fileno()))
                    cls.tasks.remove(waiter)

                    new_task = TaskStarter(SocketNB(waiter.current_state.sock))
                    cls.tasks.append(new_task)
                    Tracker.logger.debug("Updated Task array: {}".format(
                        cls.tasks))
                    cls.fd_to_task_map[
                        new_task.current_state.fileno()] = new_task

                    waiter.current_state.enqueue(
                        "run_new_task:{}".format(payload))
                    waiter.current_state.do_write()

                else:
                    Tracker.logger.debug(
                        "No waiting lambda found; launching new one")
                    Tracker.logger.debug(
                        "Putting {} invocation on launch_queue".format(
                            function_name))
                    # launcher is waiting on this queue and will launch events asynchronously
                    launch_queue.put(
                        LaunchEvent(nlaunch=len(task_list),
                                    fn_name=function_name,
                                    akid=cls.akid,
                                    secret=cls.secret,
                                    payload=payload,
                                    regions=task_list[0].regions))

                for task in task_list:
                    # Tracker.logger = logging.getTracker.logger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id'])
                    # Tracker.logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request')
                    lightlogger = lightlog.getLogger(
                        task.kwargs['in_events'].values()[0]['metadata']
                        ['pipe_id'])
                    lightlogger.debug(
                        ts=time.time(),
                        lineage=task.kwargs['in_events'].values()[0]
                        ['metadata']['lineage'],
                        op='send',
                        msg='invocation')

                Tracker.logger.debug(
                    "Invoking {} worker(s) takes {} ms".format(
                        len(task_list), (time.time() - start) * 1000))

            Tracker.logger.debug("Tracker _invocation_loop sleep")
            time.sleep(0.01)
        Tracker.logger.debug("Tracker _invocation_loop end")
Example #19
0
    def _invocation_loop(cls):
        testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        testsock.connect(("lambda.us-east-1.amazonaws.com",
                          443))  # incorrect when running on EC2
        addr = testsock.getsockname()[0]
        testsock.close()
        platform_name = settings.get('platform', 'aws_lambda')
        launcher_module = importlib.import_module('sprocket.platform.' +
                                                  platform_name + '.launcher')
        launcher_cls = launcher_module.Launcher
        launch_queue = multiprocessing.Queue()

        pid = os.fork()
        if pid == 0:
            launcher_cls.initialize(launch_queue)
            sys.exit(0)

        cls.launcher_pid = pid

        # cls._invoc_pr = cProfile.Profile()
        # cls._invoc_pr.enable()

        while not cls.should_stop:
            pending = {}  # function name -> tasklist

            t = cls.submitted_queue.get(block=True)
            lst = pending.get(t.lambda_func, [])
            lst.append(t)
            pending[t.lambda_func] = lst

            while True:
                try:
                    t = cls.submitted_queue.get(block=False)
                    lst = pending.get(t.lambda_func, [])
                    lst.append(t)
                    pending[t.lambda_func] = lst
                except Queue.Empty:
                    break

            for k, v in pending.iteritems():
                with cls.waiting_queues_lock:
                    wq = cls.waiting_queues.get(k, [])
                    wq.extend(v)
                    cls.waiting_queues[k] = wq

            for func, lst in pending.iteritems():
                lst[0].event['addr'] = settings['daemon_addr']
                start = time.time()
                payload = json.dumps(lst[0].event)
                launch_queue.put(
                    LaunchEvent(nlaunch=len(lst),
                                fn_name=func,
                                akid=cls.akid,
                                secret=cls.secret,
                                payload=payload,
                                regions=lst[0].regions))

                for p in lst:
                    # logger = logging.getLogger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id'])
                    # logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request')
                    logger = lightlog.getLogger(p.kwargs['in_events'].values()
                                                [0]['metadata']['pipe_id'])
                    logger.debug(ts=time.time(),
                                 lineage=p.kwargs['in_events'].values()[0]
                                 ['metadata']['lineage'],
                                 op='send',
                                 msg='invocation')

                logging.debug("invoking " + str(len(lst)) + ' workers takes ' +
                              str((time.time() - start) * 1000) + ' ms')

            time.sleep(0.001)