Beispiel #1
0
def default_trace_func(in_events, msg, op, **kwargs):
    """Log every command message sent/recv by the state machine.
    op includes send/recv/undo_recv/kick
    """
    # logger = logging.getLogger(in_events.values()[0]['metadata']['pipe_id'])
    # logger.debug('%s, %s, %s', in_events.values()[0]['metadata']['lineage'], op, escape_for_csv(msg))
    logger = lightlog.getLogger(in_events.values()[0]['metadata']['pipe_id'])
    logger.debug(ts=time.time(), lineage=in_events.values()[0]['metadata']['lineage'], op=op, msg=msg, **kwargs)
Beispiel #2
0
    def process_starter_tasks(cls, starter_tasks):
        should_append = []
        removable = []

        Tracker.logger.debug("Processing starter tasks")
        for tsk in starter_tasks:
            Tracker.logger.debug("Starter task with fd {}".format(
                tsk.current_state.fileno()))
            # init msg lets us know which lambda function it's from

            while True:
                try:
                    init_msg = tsk.current_state.recv_queue.popleft()
                    init_data = json.loads(init_msg)
                except ValueError as e:
                    Tracker.logger.debug(
                        "Throwing out message {}".format(init_msg))
                    continue
                break

            Tracker.logger.debug("{}:{} has init_msg {}".format(
                tsk.current_state.fileno(), tsk, init_msg))
            with Tracker.waiting_queues_lock:
                # so that we can get Task from the corresponding list
                try:
                    func_queue = Tracker.waiting_queues.get(
                        init_data['lambda_function'], [])
                    #if not len(func_queue):
                    # nothing in function queue yet
                    #    continue

                    # get first task off function queue
                    real_task = func_queue.pop(0)
                    if len(func_queue) == 0:
                        # if no more tasks on queue, can delete queue
                        del Tracker.waiting_queues[
                            init_data['lambda_function']]  # garbage collection
                    if 'lambda_start_ts' in init_data:
                        lightlogger = lightlog.getLogger(
                            real_task.kwargs['in_events'].values()[0]
                            ['metadata']['pipe_id'])
                        lightlogger.debug(
                            ts=init_data['lambda_start_ts'],
                            lineage=real_task.kwargs['in_events'].values()[0]
                            ['metadata']['lineage'],
                            op='recv',
                            msg='lambda_start_ts')
                except IndexError:
                    real_task = OrphanedTask()  # task doesn't exist
                    Tracker.logger.info(
                        "get an orphaned lambda function, sending quit")

            real_task.rewire(tsk.current_state)  # transition to a Task
            cls.fd_to_task_map[tsk.current_state.fileno()] = real_task
            tsk.current_state.update_flags()
            should_append.append(real_task)
            removable.append(tsk)
        return should_append, removable
Beispiel #3
0
def staged_trace_func(stage, num_frames, worker_called, in_events, msg, op):
    """Log every command message sent/recv by the state machine.
    op includes send/recv/undo_recv/kick
    """
    logger = lightlog.getLogger(in_events.values()[0]['metadata']['pipe_id'])
    logger.debug(stage=stage,
                 num_frames=num_frames,
                 worker_called=worker_called,
                 lineage=in_events.values()[0]['metadata']['lineage'],
                 op=op,
                 msg=msg)
Beispiel #4
0
    def _invocation_loop(cls):
        Tracker.logger.debug("Tracker _invocation_loop")
        # make sure network is working
        testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        testsock.connect(("lambda.us-east-1.amazonaws.com",
                          443))  # incorrect when running on EC2
        addr = testsock.getsockname()[0]
        testsock.close()

        platform_name = settings.get('platform', 'aws_lambda')
        launcher_module = importlib.import_module('sprocket.platform.' +
                                                  platform_name + '.launcher')
        launcher_cls = launcher_module.Launcher
        launch_queue = multiprocessing.Queue()

        pid = os.fork()
        if pid == 0:
            # child process will run the launcher
            launcher_cls.initialize(launch_queue)
            sys.exit(0)

        cls.launcher_pid = pid

        # cls._invoc_pr = cProfile.Profile()
        # cls._invoc_pr.enable()

        while not cls.should_stop:
            pending = {}  # function_name -> task list

            # Tracker.logger.debug("Tracker _invocation_loop blocking for task")

            try:
                t = cls.submitted_queue.get(block=False)
            except Queue.Empty:
                time.sleep(0.001)
                continue

            while True:
                # transfer `submitted_queue` tasks to the `pending` queue till there are no more submitted tasks
                lst = pending.get(t.lambda_func, [])
                lst.append(t)
                pending[t.lambda_func] = lst
                try:
                    t = cls.submitted_queue.get(block=False)
                except Queue.Empty:
                    break

            # move pending tasks to wait queue
            for pending_function, pending_task_list in pending.iteritems():
                with cls.waiting_queues_lock:
                    wq = cls.waiting_queues.get(pending_function, [])
                    wq.extend(pending_task_list)
                    cls.waiting_queues[pending_function] = wq

            # launch all first task for all ?
            for function_name, task_list in pending.iteritems():
                task_list[0].event['addr'] = settings['daemon_addr']
                start = time.time()

                # payload is the event info in the first task on the task list
                payload = json.dumps(task_list[0].event)

                if len(cls.tasks_waiting_for_input):
                    waiter = cls.tasks_waiting_for_input.pop(0)
                    Tracker.logger.debug(
                        "Found waiting lambda with socket {}".format(
                            waiter.current_state.sock.fileno()))
                    cls.tasks.remove(waiter)

                    new_task = TaskStarter(SocketNB(waiter.current_state.sock))
                    cls.tasks.append(new_task)
                    Tracker.logger.debug("Updated Task array: {}".format(
                        cls.tasks))
                    cls.fd_to_task_map[
                        new_task.current_state.fileno()] = new_task

                    waiter.current_state.enqueue(
                        "run_new_task:{}".format(payload))
                    waiter.current_state.do_write()

                else:
                    Tracker.logger.debug(
                        "No waiting lambda found; launching new one")
                    Tracker.logger.debug(
                        "Putting {} invocation on launch_queue".format(
                            function_name))
                    # launcher is waiting on this queue and will launch events asynchronously
                    launch_queue.put(
                        LaunchEvent(nlaunch=len(task_list),
                                    fn_name=function_name,
                                    akid=cls.akid,
                                    secret=cls.secret,
                                    payload=payload,
                                    regions=task_list[0].regions))

                for task in task_list:
                    # Tracker.logger = logging.getTracker.logger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id'])
                    # Tracker.logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request')
                    lightlogger = lightlog.getLogger(
                        task.kwargs['in_events'].values()[0]['metadata']
                        ['pipe_id'])
                    lightlogger.debug(
                        ts=time.time(),
                        lineage=task.kwargs['in_events'].values()[0]
                        ['metadata']['lineage'],
                        op='send',
                        msg='invocation')

                Tracker.logger.debug(
                    "Invoking {} worker(s) takes {} ms".format(
                        len(task_list), (time.time() - start) * 1000))

            Tracker.logger.debug("Tracker _invocation_loop sleep")
            time.sleep(0.01)
        Tracker.logger.debug("Tracker _invocation_loop end")
Beispiel #5
0
    def Submit(self, request, context):
        logging.info('PipelineServer handling submit request')
        try:
            pipe = create_from_spec(json.loads(request.pipeline_spec))

            for instream in request.inputstreams:
                for input in instream.inputs:

                    #solving edge case for initial event
                    lineage = input.lineage
                    if input.lineage == '':
                        lineage = str(1)

                    in_event = {
                        'key': input.uri,
                        'metadata': {
                            'pipe_id': pipe.pipe_id,
                            'lineage': lineage
                        }
                    }
                    pipe.inputs[instream.name][1].put(
                        {instream.type: in_event})
                    # put events to the buffer queue of all input stages

            pipe_dir = 'logs/' + pipe.pipe_id
            os.system('mkdir -p ' + pipe_dir)

            # handler = logging.FileHandler(pipe_dir + '/log.csv')
            # handler.setLevel(logging.DEBUG)
            # handler.setFormatter(logging.Formatter('%(created)f, %(message)s'))
            #memhandler = logging.handlers.MemoryHandler(1024**2*10, target=handler)
            #memhandler.shouldflush = lambda _: False

            logger = lightlog.getLogger(pipe.pipe_id)
            logger.add_metadata(
                'pipespec:\n%s\ninput:\n%s\n...\nsettings:\n%s' %
                (request.pipeline_spec, request.inputstreams[0].inputs[:1],
                 settings))
            # logger = logging.getLogger(pipe.pipe_id)
            # logger.propagate = False
            # logger.setLevel(logging.DEBUG)
            # logger.addHandler(memhandler)
            # logger.addHandler(handler)

            conf_sched = settings.get('scheduler', 'SimpleScheduler')
            candidates = [
                s for s in dir(sprocket.scheduler)
                if hasattr(vars(sprocket.scheduler)[s], conf_sched)
            ]
            if len(candidates) == 0:
                logging.error("scheduler %s not found", conf_sched)
                raise ValueError("scheduler %s not found" % conf_sched)
            sched = getattr(
                vars(sprocket.scheduler)[candidates[0]],
                conf_sched)  # only consider the first match

            logger.info(ts=time.time(), msg='start pipeline')
            sched.schedule(pipe)
            logger.info(ts=time.time(), msg='finish pipeline')

            logging.info("pipeline: %s finished", pipe.pipe_id)
            with open(pipe_dir + '/log_pb', 'wb') as f:
                f.write(logger.serialize())

            #memhandler.flush()
            result_queue = pipe.outputs.values()[0][
                1]  # there should be only one output queue

            num_m4s = 0
            out_key = None
            logging.debug("length of output queue: %s", result_queue.qsize())

            duration = 0.0
            while not result_queue.empty():
                chunk = result_queue.get(block=False)[
                    'chunks']  # TODO: should named chunks or m4schunks
                num_m4s += 1
                # duration += chunk['duration']
                #if int(chunk['metadata']['lineage']) == 1:
                #    out_key = chunk['key']

            logging.info("number of m4s chunks: %d", num_m4s)
            logging.info("total duration: %f", duration)

            return pipeline_pb2.SubmitReply(success=True, mpd_url=None)

            # return pipeline_pb2.SubmitReply(success=False, error_msg='no output is found')

        except Exception as e:
            logging.error("Received exception: {}".format(
                traceback.format_exc()))
            if 'pipe_dir' in vars():
                with open(pipe_dir + '/log_pb', 'wb') as f:
                    f.write(logger.serialize())
            return pipeline_pb2.SubmitReply(success=False,
                                            error_msg=traceback.format_exc())
Beispiel #6
0
    def _main_loop(cls):
        logging.info("tracker listening to port: %d" %
                     settings['tracker_port'])
        lsock = listen_socket('0.0.0.0', settings['tracker_port'], cls.cacert,
                              cls.srvcrt, cls.srvkey,
                              settings['tracker_backlog'])
        lsock_fd = lsock.fileno()

        tasks = []
        fd_task_map = {}
        poll_obj = select.poll()
        poll_obj.register(lsock_fd, select.POLLIN)
        npasses_out = 0

        while True:
            if cls.should_stop:
                if lsock is not None:
                    try:
                        lsock.shutdown(0)
                        lsock.close()
                    except:
                        logging.warning("failure shutting down the lsock")
                        pass
                    lsock = None
                    # os.kill(cls.pylaunch_pid, signal.SIGKILL)

            dflags = []
            for (tsk, idx) in zip(tasks, range(0, len(tasks))):
                st = tsk.current_state
                val = 0
                if st.sock is not None:
                    if not isinstance(
                            st, sprocket.controlling.tracker.machine_state.
                            TerminalState):  # always listening
                        val = val | select.POLLIN

                    if st.ssl_write or st.want_write:
                        val = val | select.POLLOUT

                    if val != tsk.rwflag:
                        tsk.rwflag = val
                        dflags.append(idx)

                else:
                    tsk.rwflag = 0
                    dflags.append(idx)
                    if not isinstance(
                            st, sprocket.controlling.tracker.machine_state.
                            TerminalState):
                        tsk.current_state = sprocket.controlling.tracker.machine_state.ErrorState(
                            tsk.current_state, "sock closed in %s" % str(tsk))
                        logging.warning("socket closed abnormally: %s" %
                                        str(tsk))

            for idx in dflags:
                if tasks[idx].rwflag != 0:
                    poll_obj.register(tasks[idx].current_state,
                                      tasks[idx].rwflag)
                else:
                    try:
                        poll_obj.unregister(tasks[idx].current_state)
                    except Exception as e:
                        logging.error("unregister: " + str(e.message))
                        pass

            pfds = poll_obj.poll(2000)
            npasses_out += 1

            if len(pfds) == 0:
                if cls.should_stop:
                    break
                continue

            # look for readable FDs
            for (fd, ev) in pfds:
                if (ev & select.POLLIN) != 0:
                    if lsock is not None and fd == lsock_fd:
                        logging.debug("listening sock got conn in")
                        cls._handle_server_sock(lsock, tasks, fd_task_map)

                    else:
                        logging.debug("conn sock %d got buffer readable", fd)
                        task = fd_task_map[fd]
                        task.do_read()

            for (fd, ev) in pfds:
                if (ev & select.POLLOUT) != 0:
                    logging.debug("conn sock %d got buffer writable", fd)
                    task = fd_task_map[fd]
                    task.do_write()

            for tsk in [
                    t for t in tasks if isinstance(
                        t.current_state, sprocket.controlling.tracker.
                        machine_state.TerminalState)
            ]:
                try:
                    poll_obj.unregister(tsk.current_state)
                except Exception as e:
                    logging.warning(e.message)
                try:
                    tsk.current_state.close()
                except Exception as e:
                    logging.warning(e.message)
                del fd_task_map[tsk.current_state.fileno()]

            should_append = []
            removable = []
            tasks = [
                t for t in tasks if not isinstance(
                    t.current_state,
                    sprocket.controlling.tracker.machine_state.TerminalState)
            ]
            for tsk in tasks:
                if tsk.current_state.want_handle:
                    if isinstance(tsk, TaskStarter):
                        # init msg lets us know which lambda function it's from
                        init_msg = tsk.current_state.recv_queue.popleft()
                        init_data = json.loads(init_msg)
                        with Tracker.waiting_queues_lock:
                            # so that we can get Task from the corresponding list
                            try:
                                func_queue = Tracker.waiting_queues.get(
                                    init_data['lambda_function'], [])
                                real_task = func_queue.pop(0)
                                if len(func_queue) == 0:
                                    del Tracker.waiting_queues[
                                        init_data['lambda_function']]  # GC
                                if 'lambda_start_ts' in init_data:
                                    logger = lightlog.getLogger(
                                        real_task.kwargs['in_events'].values()
                                        [0]['metadata']['pipe_id'])
                                    logger.debug(
                                        ts=init_data['lambda_start_ts'],
                                        lineage=real_task.kwargs['in_events'].
                                        values()[0]['metadata']['lineage'],
                                        op='recv',
                                        msg='lambda_start_ts')
                            except IndexError:
                                real_task = OrphanedTask(
                                )  # task doesn't exist
                                logging.info(
                                    "get an orphaned lambda function, sending quit"
                                )

                        real_task.rewire(
                            tsk.current_state)  # transition to a Task
                        fd_task_map[tsk.current_state.fileno()] = real_task
                        tsk.current_state.update_flags()
                        should_append.append(real_task)
                        removable.append(tsk)
            tasks.extend(should_append)
            for r in removable:
                tasks.remove(r)

            for tsk in tasks:
                if tsk.current_state.want_handle:
                    if not isinstance(tsk, TaskStarter):
                        tsk.do_handle()
Beispiel #7
0
    def _invocation_loop(cls):
        testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        testsock.connect(("lambda.us-east-1.amazonaws.com",
                          443))  # incorrect when running on EC2
        addr = testsock.getsockname()[0]
        testsock.close()
        platform_name = settings.get('platform', 'aws_lambda')
        launcher_module = importlib.import_module('sprocket.platform.' +
                                                  platform_name + '.launcher')
        launcher_cls = launcher_module.Launcher
        launch_queue = multiprocessing.Queue()

        pid = os.fork()
        if pid == 0:
            launcher_cls.initialize(launch_queue)
            sys.exit(0)

        cls.launcher_pid = pid

        # cls._invoc_pr = cProfile.Profile()
        # cls._invoc_pr.enable()

        while not cls.should_stop:
            pending = {}  # function name -> tasklist

            t = cls.submitted_queue.get(block=True)
            lst = pending.get(t.lambda_func, [])
            lst.append(t)
            pending[t.lambda_func] = lst

            while True:
                try:
                    t = cls.submitted_queue.get(block=False)
                    lst = pending.get(t.lambda_func, [])
                    lst.append(t)
                    pending[t.lambda_func] = lst
                except Queue.Empty:
                    break

            for k, v in pending.iteritems():
                with cls.waiting_queues_lock:
                    wq = cls.waiting_queues.get(k, [])
                    wq.extend(v)
                    cls.waiting_queues[k] = wq

            for func, lst in pending.iteritems():
                lst[0].event['addr'] = settings['daemon_addr']
                start = time.time()
                payload = json.dumps(lst[0].event)
                launch_queue.put(
                    LaunchEvent(nlaunch=len(lst),
                                fn_name=func,
                                akid=cls.akid,
                                secret=cls.secret,
                                payload=payload,
                                regions=lst[0].regions))

                for p in lst:
                    # logger = logging.getLogger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id'])
                    # logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request')
                    logger = lightlog.getLogger(p.kwargs['in_events'].values()
                                                [0]['metadata']['pipe_id'])
                    logger.debug(ts=time.time(),
                                 lineage=p.kwargs['in_events'].values()[0]
                                 ['metadata']['lineage'],
                                 op='send',
                                 msg='invocation')

                logging.debug("invoking " + str(len(lst)) + ' workers takes ' +
                              str((time.time() - start) * 1000) + ' ms')

            time.sleep(0.001)