def agent_checker(cls): for agent_id in cls.agent_status: if (time() - cls.agent_status[agent_id]) > cls.agent_time_out: if agent_id in cls.running_cmds: logger.warning('agent {} time out and terminated'.format(agent_id)) GCFEngine.kill_agent(agent_id) cmd_spec = cls.running_cmds[agent_id] v = cls.agent_visitor[agent_id] del cls.visitor_agent[v] del cls.agent_visitor[agent_id] del cls.agent_status[agent_id] del cls.running_cmds[agent_id] cls.push_cmd(v, cmd_spec) cls.timer = Timer(cls.time_out, cls.agent_checker, args=(cls,)) cls.timer.start()
def agent_checker(cls): for agent_id in cls.agent_status: if (time() - cls.agent_status[agent_id]) > cls.agent_time_out: if agent_id in cls.running_cmds: logger.warning( 'agent {} time out and terminated'.format(agent_id)) GCFEngine.kill_agent(agent_id) cmd_spec = cls.running_cmds[agent_id] v = cls.agent_visitor[agent_id] del cls.visitor_agent[v] del cls.agent_visitor[agent_id] del cls.agent_status[agent_id] del cls.running_cmds[agent_id] cls.push_cmd(v, cmd_spec) cls.timer = Timer(cls.time_out, cls.agent_checker, args=(cls, )) cls.timer.start()
def get_agent(cls, v): if v in cls.visitor_agent: return cls.visitor_agent[v] else: if len(cls.idle_agents) > 0: agent_id = cls.idle_agents.pop() cls.reused_agents.add(agent_id) vv = cls.agent_visitor[agent_id] del cls.visitor_agent[vv] del cls.agent_visitor[agent_id] del cls.agent_status[agent_id] else: cls.agent_count += 1 cls.agent_md5.update(bytes(cls.agent_count)) agent_id = cls.agent_md5.hexdigest() GCFEngine.spawn_agent(agent_id) cls.visitor_agent[v] = agent_id cls.agent_visitor[agent_id] = v cls.agent_status[agent_id] = time() return agent_id
def require_cmd(cls, data): agent_id = data['agent_id'] agent_host = data['agent_host'] agent_port = data['agent_port'] cmd_spec = {'cmd': 'terminate'} if agent_id in cls.pending_cmds: cmd_spec = cls.pending_cmds[agent_id] cmd_spec['logfile'] = path.join(cls.out_dir, 'cmds', agent_id) del cls.pending_cmds[agent_id] if agent_id in cls.reused_agents: cls.reused_agents.remove(agent_id) cls.running_cmds[agent_id] = cmd_spec out_data = marshal.dumps(cmd_spec) re_try = 10 while True: try: s = socket.socket() s.connect((agent_host, agent_port)) try: s.send(out_data) finally: s.close() break except Exception as e: re_try -= 1 logger.warning(str(e)) if re_try > 0: pass else: raise e if agent_id not in cls.running_cmds and agent_id not in cls.reused_agents: if agent_id in cls.idle_agents: cls.idle_agents.remove(agent_id) if agent_id in cls.agent_visitor: GCFEngine.kill_agent(agent_id) v = cls.agent_visitor[agent_id] del cls.visitor_agent[v] del cls.agent_visitor[agent_id] del cls.agent_status[agent_id]
def require_cmd(cls, data): agent_id = data['agent_id'] agent_host = data['agent_host'] agent_port = data['agent_port'] cmd_spec = {'cmd' : 'terminate'} if agent_id in cls.pending_cmds: cmd_spec = cls.pending_cmds[agent_id] cmd_spec['logfile'] = path.join(cls.out_dir, 'cmds', agent_id) del cls.pending_cmds[agent_id] if agent_id in cls.reused_agents: cls.reused_agents.remove(agent_id) cls.running_cmds[agent_id] = cmd_spec out_data = marshal.dumps(cmd_spec) re_try = 10 while True: try: s = socket.socket() s.connect((agent_host, agent_port)) try: s.send(out_data) finally: s.close() break except Exception as e: re_try -= 1 logger.warning(str(e)) if re_try > 0: pass else: raise e if agent_id not in cls.running_cmds and agent_id not in cls.reused_agents: if agent_id in cls.idle_agents: cls.idle_agents.remove(agent_id) if agent_id in cls.agent_visitor: GCFEngine.kill_agent(agent_id) v = cls.agent_visitor[agent_id] del cls.visitor_agent[v] del cls.agent_visitor[agent_id] del cls.agent_status[agent_id]
def cleanup(cls): if cls.timer: cls.timer.cancel() for agent_id in cls.agent_visitor: GCFEngine.kill_agent(agent_id)
def main(): global server_p # parsing arguments (opts, args) = args_parse() in_q = Queue() out_q = Queue() logger.info('running dv.py') # start agent server #loop = asyncio.get_event_loop() server_p = Process(target=start_agent_server, args=( in_q, out_q, path.abspath(opts.out_dir), opts.verbose, )) #server_p = Thread(target=start_agent_server, args=(loop, in_q, out_q,)) server_p.start() try: # waiting for server started host, port = in_q.get() #logger.info("agent server started on {}:{}".format(host, port)) # set gcf engine if opts.gcf == 'local': GCFEngine.set_imp( Local(host, port, path.abspath(opts.out_dir), opts.verbose)) else: if opts.gcf == 'lsf': GCFEngine.set_imp( LSF(host, port, path.abspath(opts.out_dir), opts.verbose)) else: raise Exception('unsupported gcf engine {}'.format(opts.gcf)) # config job engine JobEngine.connect(in_q, out_q) JobEngine.out_dir = path.abspath(opts.out_dir) logger.info('max agents = {}'.format(opts.max_agents)) JobEngine.max_cmds = opts.max_agents # load files require('loader') if opts.patchfile: for f in opts.patchfile: require(f) # evaluate experssions @visitor def top(): @join def body(self): if opts.expr: for e in opts.expr: @spawn(self) def body(ee=e): res = eval(ee, get_ns(), get_ns()) if type(res) == GeneratorType: yield from res return res if opts.test: @spawn(self) def body(): res = run_test(*opts.test, action=opts.action, where=opts.where) if type(res) == GeneratorType: yield from res return res yield from body() # run while True: JobEngine.run() Scheduler.run() if JobEngine.is_waiting() or Scheduler.is_waiting(): next else: break for t in Test.test_status: if Test.test_status[t] == 'passed': logger.info("*** test '{}' passed".format(t)) else: logger.error("*** test '{}' failed".format(t)) if top.exception: def print_exception(e, indent=0): if isinstance(e, Exception): for l in extract_tb(e.__traceback__): logger.debug((" " * indent) + str(l)) if not isinstance(e, Exception): logger.error((" " * indent) + str(e)) return for i in e.args: if not isinstance(i, list): i = [i] for j in i: print_exception(j, indent + 2) print_exception(top.exception) logger.error('dv.py failed') #raise top.exception else: logger.info('dv.py passed') finally: event.notify('dvpy_done') cleanup()
def main(): global server_p # parsing arguments (opts, args) = args_parse() in_q = Queue() out_q = Queue() logger.info('running dv.py') # start agent server #loop = asyncio.get_event_loop() server_p = Process(target=start_agent_server, args=(in_q, out_q, path.abspath(opts.out_dir), opts.verbose,)) #server_p = Thread(target=start_agent_server, args=(loop, in_q, out_q,)) server_p.start() try: # waiting for server started host, port = in_q.get() #logger.info("agent server started on {}:{}".format(host, port)) # set gcf engine if opts.gcf == 'local': GCFEngine.set_imp(Local(host, port, path.abspath(opts.out_dir), opts.verbose)) else: if opts.gcf == 'lsf': GCFEngine.set_imp(LSF(host, port, path.abspath(opts.out_dir), opts.verbose)) else: raise Exception('unsupported gcf engine {}'.format(opts.gcf)) # config job engine JobEngine.connect(in_q, out_q) JobEngine.out_dir = path.abspath(opts.out_dir) logger.info('max agents = {}'.format(opts.max_agents)) JobEngine.max_cmds = opts.max_agents # load files require('loader') if opts.patchfile: for f in opts.patchfile: require(f) # evaluate experssions @visitor def top(): @join def body(self): if opts.expr: for e in opts.expr: @spawn(self) def body(ee=e): res = eval(ee, get_ns(), get_ns()) if type(res) == GeneratorType: yield from res return res if opts.test: @spawn(self) def body(): res = run_test(*opts.test, action=opts.action, where=opts.where) if type(res) == GeneratorType: yield from res return res yield from body() # run while True: JobEngine.run() Scheduler.run() if JobEngine.is_waiting() or Scheduler.is_waiting(): next else: break for t in Test.test_status: if Test.test_status[t] == 'passed': logger.info("*** test '{}' passed".format(t)) else: logger.error("*** test '{}' failed".format(t)) if top.exception: def print_exception(e, indent=0): if isinstance(e, Exception): for l in extract_tb(e.__traceback__): logger.debug((" "*indent)+str(l)) if not isinstance(e, Exception): logger.error((" "*indent)+str(e)) return for i in e.args: if not isinstance(i, list): i = [i] for j in i: print_exception(j, indent+2) print_exception(top.exception) logger.error('dv.py failed') #raise top.exception else: logger.info('dv.py passed') finally: event.notify('dvpy_done') cleanup()