def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--net", help="enable network interface", action="store_true") parser.add_argument("--net-port", help="port to listen for network interface", type=int, default=5555) parser.add_argument("--file", help="enable file interface", action="store_true") parser.add_argument("--file-path", help="path to the request file", default="qcg_pm_reqs.json") parser.add_argument("--wd", help="working directory for the service", default=".") parser.add_argument( "--exschema", help="execution schema [slurm|direct] (direct by default)", default="direct") self.__args = parser.parse_args() if not self.__args.net and not self.__args.file: raise InvalidArgument("no interface enabled - finishing") self.__wd = self.__args.wd self.__setupLogging() self.__setupReports() self.__conf = { Executor.EXECUTOR_WD: self.__args.wd, Executor.EXECUTION_SCHEMA: self.__args.exschema, FileInterface.CONF_FILE_PATH: self.__args.file_path, ZMQInterface.CONF_IP_ADDRESS: "*", ZMQInterface.CONF_PORT: self.__args.net_port } self.__ifaces = [] if self.__args.file: iface = FileInterface() iface.setup(self.__conf) self.__ifaces.append(iface) if self.__args.net: iface = ZMQInterface() iface.setup(self.__conf) self.__ifaces.append(iface) self.__manager = Manager(parse_slurm_resources(), self.__conf) self.__notifId = self.__manager.registerNotifier( self.__jobNotify, self.__manager) self.__receiver = Receiver(self.__manager, self.__ifaces)
def Notest_ZMQInterfacesInit(self): # res = self.createLocalResources() res = self.createSlurmResources() manager = Manager(res) zmqConf = { ZMQInterface.CONF_IP_ADDRESS: "*", # ZMQInterface.CONF_IP_ADDRESS: "172.16.16.3", ZMQInterface.CONF_PORT: "5555" } ifaces = [ZMQInterface()] ifaces[0].setup(zmqConf) receiver = Receiver(manager, ifaces) receiver.run() asyncio.get_event_loop().run_until_complete( asyncio.gather(self.__stopInterfaces(zmqConf, receiver))) asyncio.get_event_loop().close()
def test_FileInterfaceIterateSubmit(self): res = self.createLocalResources() reqsFilePath = self.__createSubmitIteratableRequests() manager = Manager(res) ifaces = [ FileInterface() ] ifaces[0].setup( { FileInterface.CONF_FILE_PATH: reqsFilePath } ) receiver = Receiver(manager, ifaces) receiver.run() asyncio.get_event_loop().run_until_complete(asyncio.gather( self.__waitForFinish(manager) # self.__stopInterfaces(receiver, 8) # self.__delayStop(5) )) asyncio.get_event_loop().close() for i in range(self.startIter, self.endIter): jobWdDir = Template(self.iterJobSandbox).safe_substitute({ 'it': i }) self.assertTrue(jobWdDir) self.assertTrue(os.path.exists(os.path.join(jobWdDir, self.iterJobStdoutName))) self.assertTrue(os.path.exists(os.path.join(jobWdDir, self.iterJobStderrName)))
def test_FileInterfacesInit(self): res = self.createLocalResources() reqsFilePath = self.__createSampleRequests() manager = Manager(res) ifaces = [ FileInterface() ] ifaces[0].setup( { FileInterface.CONF_FILE_PATH: reqsFilePath } ) receiver = Receiver(manager, ifaces) receiver.run() asyncio.get_event_loop().run_until_complete(asyncio.gather( self.__waitForFinish(manager) # self.__stopInterfaces(receiver, 2) # self.__delayStop(5) )) asyncio.get_event_loop().close() self.assertTrue(os.path.exists(self.jobSandbox)) self.assertTrue(os.path.exists(os.path.join(self.jobSandbox, self.jobStdoutName))) self.assertTrue(os.path.exists(os.path.join(self.jobSandbox, self.jobStderrName)))
def test_JobVars(self): res = self.createLocalResources() manager = Manager(res, self.config) scriptFile = join(self.testSandbox, 'script.sh') if exists(scriptFile): os.remove(scriptFile) with open(scriptFile, 'w') as f: f.write(''' #!/bin/env bash echo "*** environment ***" env ''') os.chmod(scriptFile, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR) jobs = [ Job( 'job1', JobExecution( 'bash', args=[scriptFile], wd='${root_wd}/job1', stdout='${nnodes}.${ncores}-${jname}.stdout', stderr='${nnodes}.${ncores}-${jname}.stderr', ), JobResources(numNodes=ResourceSize(1), numCores=ResourceSize(2))) ] startTime = datetime.datetime.now() asyncio.get_event_loop().run_until_complete( asyncio.gather(self.__schedule(jobs, manager))) asyncio.get_event_loop().close() duration = datetime.datetime.now() - startTime self.assertTrue(duration.total_seconds() > 0 and duration.total_seconds() < 4) job1_wd = join(self.testSandbox, 'job1') self.assertTrue(os.path.exists(job1_wd)) self.assertTrue( os.path.exists(os.path.join(job1_wd, '1.2-job1.stdout'))) self.assertTrue( os.path.exists(os.path.join(job1_wd, '1.2-job1.stderr'))) stderrStat = os.stat(os.path.join(job1_wd, '1.2-job1.stderr')) self.assertTrue(stderrStat.st_size == 0) with open(os.path.join(job1_wd, '1.2-job1.stdout'), 'r', 1) as file, \ mmap.mmap(file.fileno(), 0, prot=mmap.PROT_READ) as s: self.assertTrue(s.find('QCG_PM_NTASKS=2'.encode('UTF-8')) != -1) self.assertTrue( s.find('QCG_PM_TASKS_PER_NODE=2'.encode('UTF-8')) != -1) self.assertTrue(s.find('QCG_PM_NNODES=1'.encode('UTF-8')) != -1) self.assertTrue(s.find('QCG_PM_NPROCS=2'.encode('UTF-8')) != -1)
def Notest_FileInterfacesService(self): res = self.createLocalResources() # res = self.createSlurmResources() manager = Manager(res) notifId = manager.registerNotifier(self.__jobNotify, manager) fileConf = {FileInterface.CONF_FILE_PATH: "reqs.json"} if 'QCG_PM_REQS_FILE' in os.environ: fileConf[ FileInterface.CONF_FILE_PATH] = os.environ['QCG_PM_REQS_FILE'] ifaces = [FileInterface()] ifaces[0].setup(fileConf) receiver = Receiver(manager, ifaces) receiver.run() asyncio.get_event_loop().run_until_complete( asyncio.gather(self.__stopInterfaces(fileConf, receiver))) asyncio.get_event_loop().close()
class QCGPMService: def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--net", help="enable network interface", action="store_true") parser.add_argument("--net-port", help="port to listen for network interface", type=int, default=5555) parser.add_argument("--file", help="enable file interface", action="store_true") parser.add_argument("--file-path", help="path to the request file", default="qcg_pm_reqs.json") parser.add_argument("--wd", help="working directory for the service", default=".") parser.add_argument( "--exschema", help="execution schema [slurm|direct] (direct by default)", default="direct") self.__args = parser.parse_args() if not self.__args.net and not self.__args.file: raise InvalidArgument("no interface enabled - finishing") self.__wd = self.__args.wd self.__setupLogging() self.__setupReports() self.__conf = { Executor.EXECUTOR_WD: self.__args.wd, Executor.EXECUTION_SCHEMA: self.__args.exschema, FileInterface.CONF_FILE_PATH: self.__args.file_path, ZMQInterface.CONF_IP_ADDRESS: "*", ZMQInterface.CONF_PORT: self.__args.net_port } self.__ifaces = [] if self.__args.file: iface = FileInterface() iface.setup(self.__conf) self.__ifaces.append(iface) if self.__args.net: iface = ZMQInterface() iface.setup(self.__conf) self.__ifaces.append(iface) self.__manager = Manager(parse_slurm_resources(), self.__conf) self.__notifId = self.__manager.registerNotifier( self.__jobNotify, self.__manager) self.__receiver = Receiver(self.__manager, self.__ifaces) def __setupReports(self): self.__jobReportFile = join(self.__wd, 'jobs.report') if exists(self.__jobReportFile): os.remove(self.__jobReportFile) def __setupLogging(self): self.__logFile = join(self.__wd, 'service.log') if exists(self.__logFile): os.remove(self.__logFile) rootLogger = logging.getLogger() handler = logging.FileHandler(filename=self.__logFile, mode='a', delay=False) handler.setFormatter(logging.Formatter('%(asctime)-15s: %(message)s')) rootLogger.addHandler(handler) rootLogger.setLevel(logging.DEBUG) async def __stopInterfaces(self, receiver): while not receiver.isFinished: await asyncio.sleep(1) logging.info("stopping receiver ...") receiver.stop() def __jobNotify(self, jobId, state, manager): if self.__jobReportFile is not None: if state.isFinished(): with open(self.__jobReportFile, 'a') as f: job = manager.jobList.get(jobId) f.write("%s (%s)\n\t%s\n\t%s\n" % (jobId, state.name, "\n\t".join([ "%s: %s" % (str(en[1]), en[0].name) for en in job.history ]), "\n\t".join([ "%s: %s" % (k, v) for k, v in job.runtime.items() ]))) def start(self): self.__receiver.run() asyncio.get_event_loop().run_until_complete( asyncio.gather(self.__stopInterfaces(self.__receiver))) asyncio.get_event_loop().close()
def test_ExecutorSimple(self): res = self.createLocalResources() manager = Manager(res) testSandbox = 'test-sandbox' if exists(testSandbox): shutil.rmtree(testSandbox) os.makedirs(testSandbox) for dir in [ 'hostname.sandbox', 'env.sandbox', 'sleep.sandbox', 'script.sandbox' ]: dPath = join(testSandbox, dir) if exists(dPath): shutil.rmtree(dPath) scriptFile = abspath(join(testSandbox, 'script.sh')) if exists(scriptFile): os.remove(scriptFile) with open(scriptFile, 'w') as f: f.write(''' #!/bin/bash echo "*** environment ***" env echo "*** info ***" echo "host: `hostname --fqdn`" echo "cwd: `pwd`" echo "date: `date`" echo "account: `id`" echo "taskset: `taskset -p $$`" ''') hostnameStdinFile = abspath(join(testSandbox, 'hostname.stdin')) if exists(hostnameStdinFile): os.remove(hostnameStdinFile) with open(hostnameStdinFile, 'w') as f: f.write('some host name') jobs = [ Job( 'job1', JobExecution( '/usr/bin/wc', args=['-m'], wd=abspath(join(testSandbox, 'hostname.sandbox')), stdin=hostnameStdinFile, stdout='hostname.stdout', stderr='hostname.stderr', ), JobResources(numCores=ResourceSize(2))), Job( 'job2', JobExecution( '/usr/bin/env', wd=abspath(join(testSandbox, 'env.sandbox')), stdout='env.stdout', stderr='env.stderr', ), JobResources(numCores=ResourceSize(1))), Job( 'sleep', JobExecution( '/usr/bin/sleep', args=['2s'], wd=abspath(join(testSandbox, 'sleep.sandbox')), stdout='sleep.stdout', stderr='sleep.stderr', ), JobResources(numCores=ResourceSize(1))), Job( 'script', JobExecution( '/usr/bin/bash', args=[scriptFile], wd=abspath(join(testSandbox, 'script.sandbox')), stdout='script.stdout', stderr='script.stderr', ), JobResources(numCores=ResourceSize(1))) ] startTime = datetime.datetime.now() asyncio.get_event_loop().run_until_complete( asyncio.gather(self.__schedule(jobs, manager))) asyncio.get_event_loop().close() duration = datetime.datetime.now() - startTime self.assertTrue(duration.total_seconds() > 2 and duration.total_seconds() < 6) for job in jobs: self.assertTrue(os.path.exists(job.execution.wd)) self.assertTrue( os.path.exists( os.path.join(job.execution.wd, job.execution.stdout))) self.assertTrue( os.path.exists( os.path.join(job.execution.wd, job.execution.stderr))) stderrStat = os.stat( os.path.join(job.execution.wd, job.execution.stderr)) self.assertTrue(stderrStat.st_size == 0)