Beispiel #1
0
    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument("--net",
                            help="enable network interface",
                            action="store_true")
        parser.add_argument("--net-port",
                            help="port to listen for network interface",
                            type=int,
                            default=5555)
        parser.add_argument("--file",
                            help="enable file interface",
                            action="store_true")
        parser.add_argument("--file-path",
                            help="path to the request file",
                            default="qcg_pm_reqs.json")
        parser.add_argument("--wd",
                            help="working directory for the service",
                            default=".")
        parser.add_argument(
            "--exschema",
            help="execution schema [slurm|direct] (direct by default)",
            default="direct")
        self.__args = parser.parse_args()

        if not self.__args.net and not self.__args.file:
            raise InvalidArgument("no interface enabled - finishing")

        self.__wd = self.__args.wd

        self.__setupLogging()
        self.__setupReports()

        self.__conf = {
            Executor.EXECUTOR_WD: self.__args.wd,
            Executor.EXECUTION_SCHEMA: self.__args.exschema,
            FileInterface.CONF_FILE_PATH: self.__args.file_path,
            ZMQInterface.CONF_IP_ADDRESS: "*",
            ZMQInterface.CONF_PORT: self.__args.net_port
        }

        self.__ifaces = []
        if self.__args.file:
            iface = FileInterface()
            iface.setup(self.__conf)
            self.__ifaces.append(iface)

        if self.__args.net:
            iface = ZMQInterface()
            iface.setup(self.__conf)
            self.__ifaces.append(iface)

        self.__manager = Manager(parse_slurm_resources(), self.__conf)
        self.__notifId = self.__manager.registerNotifier(
            self.__jobNotify, self.__manager)
        self.__receiver = Receiver(self.__manager, self.__ifaces)
Beispiel #2
0
    def Notest_ZMQInterfacesInit(self):
        #		res = self.createLocalResources()
        res = self.createSlurmResources()

        manager = Manager(res)

        zmqConf = {
            ZMQInterface.CONF_IP_ADDRESS:
            "*",
            #			ZMQInterface.CONF_IP_ADDRESS: "172.16.16.3",
            ZMQInterface.CONF_PORT:
            "5555"
        }

        ifaces = [ZMQInterface()]
        ifaces[0].setup(zmqConf)

        receiver = Receiver(manager, ifaces)

        receiver.run()

        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(self.__stopInterfaces(zmqConf, receiver)))

        asyncio.get_event_loop().close()
	def test_FileInterfaceIterateSubmit(self):
		res = self.createLocalResources()

		reqsFilePath = self.__createSubmitIteratableRequests()

		manager = Manager(res)

		ifaces = [ FileInterface() ]
		ifaces[0].setup( { FileInterface.CONF_FILE_PATH: reqsFilePath } )

		receiver = Receiver(manager, ifaces)

		receiver.run()

		asyncio.get_event_loop().run_until_complete(asyncio.gather(
			self.__waitForFinish(manager)
#			self.__stopInterfaces(receiver, 8)
#			self.__delayStop(5)
			))

		asyncio.get_event_loop().close()

		for i in range(self.startIter, self.endIter):
			jobWdDir = Template(self.iterJobSandbox).safe_substitute({ 'it': i })
			self.assertTrue(jobWdDir)
			self.assertTrue(os.path.exists(os.path.join(jobWdDir, self.iterJobStdoutName)))
			self.assertTrue(os.path.exists(os.path.join(jobWdDir, self.iterJobStderrName)))
	def test_FileInterfacesInit(self):
		res = self.createLocalResources()

		reqsFilePath = self.__createSampleRequests()

		manager = Manager(res)

		ifaces = [ FileInterface() ]
		ifaces[0].setup( { FileInterface.CONF_FILE_PATH: reqsFilePath } )

		receiver = Receiver(manager, ifaces)

		receiver.run()

		asyncio.get_event_loop().run_until_complete(asyncio.gather(
			self.__waitForFinish(manager)
#			self.__stopInterfaces(receiver, 2)
#			self.__delayStop(5)
			))

		asyncio.get_event_loop().close()

		self.assertTrue(os.path.exists(self.jobSandbox))
		self.assertTrue(os.path.exists(os.path.join(self.jobSandbox, self.jobStdoutName)))
		self.assertTrue(os.path.exists(os.path.join(self.jobSandbox, self.jobStderrName)))
    def test_JobVars(self):
        res = self.createLocalResources()
        manager = Manager(res, self.config)

        scriptFile = join(self.testSandbox, 'script.sh')
        if exists(scriptFile):
            os.remove(scriptFile)

        with open(scriptFile, 'w') as f:
            f.write('''
#!/bin/env bash

echo "*** environment ***"
env

			''')
        os.chmod(scriptFile, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)

        jobs = [
            Job(
                'job1',
                JobExecution(
                    'bash',
                    args=[scriptFile],
                    wd='${root_wd}/job1',
                    stdout='${nnodes}.${ncores}-${jname}.stdout',
                    stderr='${nnodes}.${ncores}-${jname}.stderr',
                ),
                JobResources(numNodes=ResourceSize(1),
                             numCores=ResourceSize(2)))
        ]

        startTime = datetime.datetime.now()

        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(self.__schedule(jobs, manager)))
        asyncio.get_event_loop().close()

        duration = datetime.datetime.now() - startTime
        self.assertTrue(duration.total_seconds() > 0
                        and duration.total_seconds() < 4)

        job1_wd = join(self.testSandbox, 'job1')
        self.assertTrue(os.path.exists(job1_wd))
        self.assertTrue(
            os.path.exists(os.path.join(job1_wd, '1.2-job1.stdout')))
        self.assertTrue(
            os.path.exists(os.path.join(job1_wd, '1.2-job1.stderr')))

        stderrStat = os.stat(os.path.join(job1_wd, '1.2-job1.stderr'))
        self.assertTrue(stderrStat.st_size == 0)

        with open(os.path.join(job1_wd, '1.2-job1.stdout'), 'r', 1) as file, \
             mmap.mmap(file.fileno(), 0, prot=mmap.PROT_READ) as s:
            self.assertTrue(s.find('QCG_PM_NTASKS=2'.encode('UTF-8')) != -1)
            self.assertTrue(
                s.find('QCG_PM_TASKS_PER_NODE=2'.encode('UTF-8')) != -1)
            self.assertTrue(s.find('QCG_PM_NNODES=1'.encode('UTF-8')) != -1)
            self.assertTrue(s.find('QCG_PM_NPROCS=2'.encode('UTF-8')) != -1)
    def Notest_FileInterfacesService(self):
        res = self.createLocalResources()
        #		res = self.createSlurmResources()

        manager = Manager(res)
        notifId = manager.registerNotifier(self.__jobNotify, manager)

        fileConf = {FileInterface.CONF_FILE_PATH: "reqs.json"}

        if 'QCG_PM_REQS_FILE' in os.environ:
            fileConf[
                FileInterface.CONF_FILE_PATH] = os.environ['QCG_PM_REQS_FILE']

        ifaces = [FileInterface()]
        ifaces[0].setup(fileConf)

        receiver = Receiver(manager, ifaces)

        receiver.run()

        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(self.__stopInterfaces(fileConf, receiver)))

        asyncio.get_event_loop().close()
Beispiel #7
0
class QCGPMService:
    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument("--net",
                            help="enable network interface",
                            action="store_true")
        parser.add_argument("--net-port",
                            help="port to listen for network interface",
                            type=int,
                            default=5555)
        parser.add_argument("--file",
                            help="enable file interface",
                            action="store_true")
        parser.add_argument("--file-path",
                            help="path to the request file",
                            default="qcg_pm_reqs.json")
        parser.add_argument("--wd",
                            help="working directory for the service",
                            default=".")
        parser.add_argument(
            "--exschema",
            help="execution schema [slurm|direct] (direct by default)",
            default="direct")
        self.__args = parser.parse_args()

        if not self.__args.net and not self.__args.file:
            raise InvalidArgument("no interface enabled - finishing")

        self.__wd = self.__args.wd

        self.__setupLogging()
        self.__setupReports()

        self.__conf = {
            Executor.EXECUTOR_WD: self.__args.wd,
            Executor.EXECUTION_SCHEMA: self.__args.exschema,
            FileInterface.CONF_FILE_PATH: self.__args.file_path,
            ZMQInterface.CONF_IP_ADDRESS: "*",
            ZMQInterface.CONF_PORT: self.__args.net_port
        }

        self.__ifaces = []
        if self.__args.file:
            iface = FileInterface()
            iface.setup(self.__conf)
            self.__ifaces.append(iface)

        if self.__args.net:
            iface = ZMQInterface()
            iface.setup(self.__conf)
            self.__ifaces.append(iface)

        self.__manager = Manager(parse_slurm_resources(), self.__conf)
        self.__notifId = self.__manager.registerNotifier(
            self.__jobNotify, self.__manager)
        self.__receiver = Receiver(self.__manager, self.__ifaces)

    def __setupReports(self):
        self.__jobReportFile = join(self.__wd, 'jobs.report')

        if exists(self.__jobReportFile):
            os.remove(self.__jobReportFile)

    def __setupLogging(self):
        self.__logFile = join(self.__wd, 'service.log')

        if exists(self.__logFile):
            os.remove(self.__logFile)

        rootLogger = logging.getLogger()
        handler = logging.FileHandler(filename=self.__logFile,
                                      mode='a',
                                      delay=False)
        handler.setFormatter(logging.Formatter('%(asctime)-15s: %(message)s'))
        rootLogger.addHandler(handler)
        rootLogger.setLevel(logging.DEBUG)

    async def __stopInterfaces(self, receiver):
        while not receiver.isFinished:
            await asyncio.sleep(1)

        logging.info("stopping receiver ...")
        receiver.stop()

    def __jobNotify(self, jobId, state, manager):
        if self.__jobReportFile is not None:
            if state.isFinished():
                with open(self.__jobReportFile, 'a') as f:
                    job = manager.jobList.get(jobId)
                    f.write("%s (%s)\n\t%s\n\t%s\n" %
                            (jobId, state.name, "\n\t".join([
                                "%s: %s" % (str(en[1]), en[0].name)
                                for en in job.history
                            ]), "\n\t".join([
                                "%s: %s" % (k, v)
                                for k, v in job.runtime.items()
                            ])))

    def start(self):
        self.__receiver.run()

        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(self.__stopInterfaces(self.__receiver)))

        asyncio.get_event_loop().close()
    def test_ExecutorSimple(self):
        res = self.createLocalResources()
        manager = Manager(res)

        testSandbox = 'test-sandbox'
        if exists(testSandbox):
            shutil.rmtree(testSandbox)
        os.makedirs(testSandbox)

        for dir in [
                'hostname.sandbox', 'env.sandbox', 'sleep.sandbox',
                'script.sandbox'
        ]:
            dPath = join(testSandbox, dir)
            if exists(dPath):
                shutil.rmtree(dPath)

        scriptFile = abspath(join(testSandbox, 'script.sh'))
        if exists(scriptFile):
            os.remove(scriptFile)

        with open(scriptFile, 'w') as f:
            f.write('''
#!/bin/bash

echo "*** environment ***"
env

echo "*** info ***"
echo "host: `hostname --fqdn`"
echo "cwd: `pwd`"
echo "date: `date`"
echo "account: `id`"
echo "taskset: `taskset -p $$`"
			''')

        hostnameStdinFile = abspath(join(testSandbox, 'hostname.stdin'))
        if exists(hostnameStdinFile):
            os.remove(hostnameStdinFile)

        with open(hostnameStdinFile, 'w') as f:
            f.write('some host name')

        jobs = [
            Job(
                'job1',
                JobExecution(
                    '/usr/bin/wc',
                    args=['-m'],
                    wd=abspath(join(testSandbox, 'hostname.sandbox')),
                    stdin=hostnameStdinFile,
                    stdout='hostname.stdout',
                    stderr='hostname.stderr',
                ), JobResources(numCores=ResourceSize(2))),
            Job(
                'job2',
                JobExecution(
                    '/usr/bin/env',
                    wd=abspath(join(testSandbox, 'env.sandbox')),
                    stdout='env.stdout',
                    stderr='env.stderr',
                ), JobResources(numCores=ResourceSize(1))),
            Job(
                'sleep',
                JobExecution(
                    '/usr/bin/sleep',
                    args=['2s'],
                    wd=abspath(join(testSandbox, 'sleep.sandbox')),
                    stdout='sleep.stdout',
                    stderr='sleep.stderr',
                ), JobResources(numCores=ResourceSize(1))),
            Job(
                'script',
                JobExecution(
                    '/usr/bin/bash',
                    args=[scriptFile],
                    wd=abspath(join(testSandbox, 'script.sandbox')),
                    stdout='script.stdout',
                    stderr='script.stderr',
                ), JobResources(numCores=ResourceSize(1)))
        ]

        startTime = datetime.datetime.now()

        asyncio.get_event_loop().run_until_complete(
            asyncio.gather(self.__schedule(jobs, manager)))
        asyncio.get_event_loop().close()

        duration = datetime.datetime.now() - startTime
        self.assertTrue(duration.total_seconds() > 2
                        and duration.total_seconds() < 6)

        for job in jobs:
            self.assertTrue(os.path.exists(job.execution.wd))
            self.assertTrue(
                os.path.exists(
                    os.path.join(job.execution.wd, job.execution.stdout)))
            self.assertTrue(
                os.path.exists(
                    os.path.join(job.execution.wd, job.execution.stderr)))

            stderrStat = os.stat(
                os.path.join(job.execution.wd, job.execution.stderr))
            self.assertTrue(stderrStat.st_size == 0)