def default_trace_func(in_events, msg, op, **kwargs): """Log every command message sent/recv by the state machine. op includes send/recv/undo_recv/kick """ # logger = logging.getLogger(in_events.values()[0]['metadata']['pipe_id']) # logger.debug('%s, %s, %s', in_events.values()[0]['metadata']['lineage'], op, escape_for_csv(msg)) logger = lightlog.getLogger(in_events.values()[0]['metadata']['pipe_id']) logger.debug(ts=time.time(), lineage=in_events.values()[0]['metadata']['lineage'], op=op, msg=msg, **kwargs)
def process_starter_tasks(cls, starter_tasks): should_append = [] removable = [] Tracker.logger.debug("Processing starter tasks") for tsk in starter_tasks: Tracker.logger.debug("Starter task with fd {}".format( tsk.current_state.fileno())) # init msg lets us know which lambda function it's from while True: try: init_msg = tsk.current_state.recv_queue.popleft() init_data = json.loads(init_msg) except ValueError as e: Tracker.logger.debug( "Throwing out message {}".format(init_msg)) continue break Tracker.logger.debug("{}:{} has init_msg {}".format( tsk.current_state.fileno(), tsk, init_msg)) with Tracker.waiting_queues_lock: # so that we can get Task from the corresponding list try: func_queue = Tracker.waiting_queues.get( init_data['lambda_function'], []) #if not len(func_queue): # nothing in function queue yet # continue # get first task off function queue real_task = func_queue.pop(0) if len(func_queue) == 0: # if no more tasks on queue, can delete queue del Tracker.waiting_queues[ init_data['lambda_function']] # garbage collection if 'lambda_start_ts' in init_data: lightlogger = lightlog.getLogger( real_task.kwargs['in_events'].values()[0] ['metadata']['pipe_id']) lightlogger.debug( ts=init_data['lambda_start_ts'], lineage=real_task.kwargs['in_events'].values()[0] ['metadata']['lineage'], op='recv', msg='lambda_start_ts') except IndexError: real_task = OrphanedTask() # task doesn't exist Tracker.logger.info( "get an orphaned lambda function, sending quit") real_task.rewire(tsk.current_state) # transition to a Task cls.fd_to_task_map[tsk.current_state.fileno()] = real_task tsk.current_state.update_flags() should_append.append(real_task) removable.append(tsk) return should_append, removable
def staged_trace_func(stage, num_frames, worker_called, in_events, msg, op): """Log every command message sent/recv by the state machine. op includes send/recv/undo_recv/kick """ logger = lightlog.getLogger(in_events.values()[0]['metadata']['pipe_id']) logger.debug(stage=stage, num_frames=num_frames, worker_called=worker_called, lineage=in_events.values()[0]['metadata']['lineage'], op=op, msg=msg)
def _invocation_loop(cls): Tracker.logger.debug("Tracker _invocation_loop") # make sure network is working testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) testsock.connect(("lambda.us-east-1.amazonaws.com", 443)) # incorrect when running on EC2 addr = testsock.getsockname()[0] testsock.close() platform_name = settings.get('platform', 'aws_lambda') launcher_module = importlib.import_module('sprocket.platform.' + platform_name + '.launcher') launcher_cls = launcher_module.Launcher launch_queue = multiprocessing.Queue() pid = os.fork() if pid == 0: # child process will run the launcher launcher_cls.initialize(launch_queue) sys.exit(0) cls.launcher_pid = pid # cls._invoc_pr = cProfile.Profile() # cls._invoc_pr.enable() while not cls.should_stop: pending = {} # function_name -> task list # Tracker.logger.debug("Tracker _invocation_loop blocking for task") try: t = cls.submitted_queue.get(block=False) except Queue.Empty: time.sleep(0.001) continue while True: # transfer `submitted_queue` tasks to the `pending` queue till there are no more submitted tasks lst = pending.get(t.lambda_func, []) lst.append(t) pending[t.lambda_func] = lst try: t = cls.submitted_queue.get(block=False) except Queue.Empty: break # move pending tasks to wait queue for pending_function, pending_task_list in pending.iteritems(): with cls.waiting_queues_lock: wq = cls.waiting_queues.get(pending_function, []) wq.extend(pending_task_list) cls.waiting_queues[pending_function] = wq # launch all first task for all ? for function_name, task_list in pending.iteritems(): task_list[0].event['addr'] = settings['daemon_addr'] start = time.time() # payload is the event info in the first task on the task list payload = json.dumps(task_list[0].event) if len(cls.tasks_waiting_for_input): waiter = cls.tasks_waiting_for_input.pop(0) Tracker.logger.debug( "Found waiting lambda with socket {}".format( waiter.current_state.sock.fileno())) cls.tasks.remove(waiter) new_task = TaskStarter(SocketNB(waiter.current_state.sock)) cls.tasks.append(new_task) Tracker.logger.debug("Updated Task array: {}".format( cls.tasks)) cls.fd_to_task_map[ new_task.current_state.fileno()] = new_task waiter.current_state.enqueue( "run_new_task:{}".format(payload)) waiter.current_state.do_write() else: Tracker.logger.debug( "No waiting lambda found; launching new one") Tracker.logger.debug( "Putting {} invocation on launch_queue".format( function_name)) # launcher is waiting on this queue and will launch events asynchronously launch_queue.put( LaunchEvent(nlaunch=len(task_list), fn_name=function_name, akid=cls.akid, secret=cls.secret, payload=payload, regions=task_list[0].regions)) for task in task_list: # Tracker.logger = logging.getTracker.logger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id']) # Tracker.logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request') lightlogger = lightlog.getLogger( task.kwargs['in_events'].values()[0]['metadata'] ['pipe_id']) lightlogger.debug( ts=time.time(), lineage=task.kwargs['in_events'].values()[0] ['metadata']['lineage'], op='send', msg='invocation') Tracker.logger.debug( "Invoking {} worker(s) takes {} ms".format( len(task_list), (time.time() - start) * 1000)) Tracker.logger.debug("Tracker _invocation_loop sleep") time.sleep(0.01) Tracker.logger.debug("Tracker _invocation_loop end")
def Submit(self, request, context): logging.info('PipelineServer handling submit request') try: pipe = create_from_spec(json.loads(request.pipeline_spec)) for instream in request.inputstreams: for input in instream.inputs: #solving edge case for initial event lineage = input.lineage if input.lineage == '': lineage = str(1) in_event = { 'key': input.uri, 'metadata': { 'pipe_id': pipe.pipe_id, 'lineage': lineage } } pipe.inputs[instream.name][1].put( {instream.type: in_event}) # put events to the buffer queue of all input stages pipe_dir = 'logs/' + pipe.pipe_id os.system('mkdir -p ' + pipe_dir) # handler = logging.FileHandler(pipe_dir + '/log.csv') # handler.setLevel(logging.DEBUG) # handler.setFormatter(logging.Formatter('%(created)f, %(message)s')) #memhandler = logging.handlers.MemoryHandler(1024**2*10, target=handler) #memhandler.shouldflush = lambda _: False logger = lightlog.getLogger(pipe.pipe_id) logger.add_metadata( 'pipespec:\n%s\ninput:\n%s\n...\nsettings:\n%s' % (request.pipeline_spec, request.inputstreams[0].inputs[:1], settings)) # logger = logging.getLogger(pipe.pipe_id) # logger.propagate = False # logger.setLevel(logging.DEBUG) # logger.addHandler(memhandler) # logger.addHandler(handler) conf_sched = settings.get('scheduler', 'SimpleScheduler') candidates = [ s for s in dir(sprocket.scheduler) if hasattr(vars(sprocket.scheduler)[s], conf_sched) ] if len(candidates) == 0: logging.error("scheduler %s not found", conf_sched) raise ValueError("scheduler %s not found" % conf_sched) sched = getattr( vars(sprocket.scheduler)[candidates[0]], conf_sched) # only consider the first match logger.info(ts=time.time(), msg='start pipeline') sched.schedule(pipe) logger.info(ts=time.time(), msg='finish pipeline') logging.info("pipeline: %s finished", pipe.pipe_id) with open(pipe_dir + '/log_pb', 'wb') as f: f.write(logger.serialize()) #memhandler.flush() result_queue = pipe.outputs.values()[0][ 1] # there should be only one output queue num_m4s = 0 out_key = None logging.debug("length of output queue: %s", result_queue.qsize()) duration = 0.0 while not result_queue.empty(): chunk = result_queue.get(block=False)[ 'chunks'] # TODO: should named chunks or m4schunks num_m4s += 1 # duration += chunk['duration'] #if int(chunk['metadata']['lineage']) == 1: # out_key = chunk['key'] logging.info("number of m4s chunks: %d", num_m4s) logging.info("total duration: %f", duration) return pipeline_pb2.SubmitReply(success=True, mpd_url=None) # return pipeline_pb2.SubmitReply(success=False, error_msg='no output is found') except Exception as e: logging.error("Received exception: {}".format( traceback.format_exc())) if 'pipe_dir' in vars(): with open(pipe_dir + '/log_pb', 'wb') as f: f.write(logger.serialize()) return pipeline_pb2.SubmitReply(success=False, error_msg=traceback.format_exc())
def _main_loop(cls): logging.info("tracker listening to port: %d" % settings['tracker_port']) lsock = listen_socket('0.0.0.0', settings['tracker_port'], cls.cacert, cls.srvcrt, cls.srvkey, settings['tracker_backlog']) lsock_fd = lsock.fileno() tasks = [] fd_task_map = {} poll_obj = select.poll() poll_obj.register(lsock_fd, select.POLLIN) npasses_out = 0 while True: if cls.should_stop: if lsock is not None: try: lsock.shutdown(0) lsock.close() except: logging.warning("failure shutting down the lsock") pass lsock = None # os.kill(cls.pylaunch_pid, signal.SIGKILL) dflags = [] for (tsk, idx) in zip(tasks, range(0, len(tasks))): st = tsk.current_state val = 0 if st.sock is not None: if not isinstance( st, sprocket.controlling.tracker.machine_state. TerminalState): # always listening val = val | select.POLLIN if st.ssl_write or st.want_write: val = val | select.POLLOUT if val != tsk.rwflag: tsk.rwflag = val dflags.append(idx) else: tsk.rwflag = 0 dflags.append(idx) if not isinstance( st, sprocket.controlling.tracker.machine_state. TerminalState): tsk.current_state = sprocket.controlling.tracker.machine_state.ErrorState( tsk.current_state, "sock closed in %s" % str(tsk)) logging.warning("socket closed abnormally: %s" % str(tsk)) for idx in dflags: if tasks[idx].rwflag != 0: poll_obj.register(tasks[idx].current_state, tasks[idx].rwflag) else: try: poll_obj.unregister(tasks[idx].current_state) except Exception as e: logging.error("unregister: " + str(e.message)) pass pfds = poll_obj.poll(2000) npasses_out += 1 if len(pfds) == 0: if cls.should_stop: break continue # look for readable FDs for (fd, ev) in pfds: if (ev & select.POLLIN) != 0: if lsock is not None and fd == lsock_fd: logging.debug("listening sock got conn in") cls._handle_server_sock(lsock, tasks, fd_task_map) else: logging.debug("conn sock %d got buffer readable", fd) task = fd_task_map[fd] task.do_read() for (fd, ev) in pfds: if (ev & select.POLLOUT) != 0: logging.debug("conn sock %d got buffer writable", fd) task = fd_task_map[fd] task.do_write() for tsk in [ t for t in tasks if isinstance( t.current_state, sprocket.controlling.tracker. machine_state.TerminalState) ]: try: poll_obj.unregister(tsk.current_state) except Exception as e: logging.warning(e.message) try: tsk.current_state.close() except Exception as e: logging.warning(e.message) del fd_task_map[tsk.current_state.fileno()] should_append = [] removable = [] tasks = [ t for t in tasks if not isinstance( t.current_state, sprocket.controlling.tracker.machine_state.TerminalState) ] for tsk in tasks: if tsk.current_state.want_handle: if isinstance(tsk, TaskStarter): # init msg lets us know which lambda function it's from init_msg = tsk.current_state.recv_queue.popleft() init_data = json.loads(init_msg) with Tracker.waiting_queues_lock: # so that we can get Task from the corresponding list try: func_queue = Tracker.waiting_queues.get( init_data['lambda_function'], []) real_task = func_queue.pop(0) if len(func_queue) == 0: del Tracker.waiting_queues[ init_data['lambda_function']] # GC if 'lambda_start_ts' in init_data: logger = lightlog.getLogger( real_task.kwargs['in_events'].values() [0]['metadata']['pipe_id']) logger.debug( ts=init_data['lambda_start_ts'], lineage=real_task.kwargs['in_events']. values()[0]['metadata']['lineage'], op='recv', msg='lambda_start_ts') except IndexError: real_task = OrphanedTask( ) # task doesn't exist logging.info( "get an orphaned lambda function, sending quit" ) real_task.rewire( tsk.current_state) # transition to a Task fd_task_map[tsk.current_state.fileno()] = real_task tsk.current_state.update_flags() should_append.append(real_task) removable.append(tsk) tasks.extend(should_append) for r in removable: tasks.remove(r) for tsk in tasks: if tsk.current_state.want_handle: if not isinstance(tsk, TaskStarter): tsk.do_handle()
def _invocation_loop(cls): testsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) testsock.connect(("lambda.us-east-1.amazonaws.com", 443)) # incorrect when running on EC2 addr = testsock.getsockname()[0] testsock.close() platform_name = settings.get('platform', 'aws_lambda') launcher_module = importlib.import_module('sprocket.platform.' + platform_name + '.launcher') launcher_cls = launcher_module.Launcher launch_queue = multiprocessing.Queue() pid = os.fork() if pid == 0: launcher_cls.initialize(launch_queue) sys.exit(0) cls.launcher_pid = pid # cls._invoc_pr = cProfile.Profile() # cls._invoc_pr.enable() while not cls.should_stop: pending = {} # function name -> tasklist t = cls.submitted_queue.get(block=True) lst = pending.get(t.lambda_func, []) lst.append(t) pending[t.lambda_func] = lst while True: try: t = cls.submitted_queue.get(block=False) lst = pending.get(t.lambda_func, []) lst.append(t) pending[t.lambda_func] = lst except Queue.Empty: break for k, v in pending.iteritems(): with cls.waiting_queues_lock: wq = cls.waiting_queues.get(k, []) wq.extend(v) cls.waiting_queues[k] = wq for func, lst in pending.iteritems(): lst[0].event['addr'] = settings['daemon_addr'] start = time.time() payload = json.dumps(lst[0].event) launch_queue.put( LaunchEvent(nlaunch=len(lst), fn_name=func, akid=cls.akid, secret=cls.secret, payload=payload, regions=lst[0].regions)) for p in lst: # logger = logging.getLogger(p.kwargs['in_events'].values()[0]['metadata']['pipe_id']) # logger.debug('%s, %s', p.kwargs['in_events'].values()[0]['metadata']['lineage'], 'send, request') logger = lightlog.getLogger(p.kwargs['in_events'].values() [0]['metadata']['pipe_id']) logger.debug(ts=time.time(), lineage=p.kwargs['in_events'].values()[0] ['metadata']['lineage'], op='send', msg='invocation') logging.debug("invoking " + str(len(lst)) + ' workers takes ' + str((time.time() - start) * 1000) + ' ms') time.sleep(0.001)