def test_get_parametric_dataset(self): error_script1 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') """ error_script2 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) """ script = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.somethingelse('other') """ self.assertRaises(BackendError, get_parametric_datasets, error_script1.splitlines()) self.assertRaises(BackendError, get_parametric_datasets, error_script2.splitlines()) self.assertEqual(get_parametric_datasets(script.splitlines()), [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']], "parametric dataset not correctly extracted") self.assertTrue( isinstance(get_parametric_datasets(script.splitlines()), list))
def test_get_parametric_dataset(self): error_script1 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') """ error_script2 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) """ script = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.somethingelse('other') """ self.assertRaises( BackendError, get_parametric_datasets, error_script1.splitlines()) self.assertRaises( BackendError, get_parametric_datasets, error_script2.splitlines()) self.assertEqual(get_parametric_datasets(script.splitlines()), [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']], "parametric dataset not correctly extracted") self.assertTrue( isinstance(get_parametric_datasets(script.splitlines()), list))
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): """ This is the old bulk submit method which is used to construct the subjobs for a parametric job Args: dirac_ids (list): This is a list of the Dirac ids which have been created dirac_script (str): Name of the dirac script which contains the job jdl """ f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError( 'Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC' ) master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) return True
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): """ This is the old bulk submit method which is used to construct the subjobs for a parametric job Args: dirac_ids (list): This is a list of the Dirac ids which have been created dirac_script (str): Name of the dirac script which contains the job jdl """ f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) return True
def test_get_parametric_dataset(): error_script1 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') """ error_script2 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) """ script = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.somethingelse('other') """ with pytest.raises(BackendError): get_parametric_datasets(error_script1.splitlines()) with pytest.raises(BackendError): get_parametric_datasets(error_script2.splitlines()) assert get_parametric_datasets(script.splitlines()) == [ ['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i'] ], 'parametric dataset not correctly extracted' assert isinstance(get_parametric_datasets(script.splitlines()), list)
def test_get_parametric_dataset(): error_script1 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') """ error_script2 = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) """ script = """ from DIRAC import Job j=Job() j.outputsomething('output.root') j.setParametricInputData([['a','b','c'],['d','e','f'],['g','h','i']]) j.somethingelse('other') """ with pytest.raises(BackendError): get_parametric_datasets(error_script1.splitlines()) with pytest.raises(BackendError): get_parametric_datasets(error_script2.splitlines()) assert get_parametric_datasets(script.splitlines()) == [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']], 'parametric dataset not correctly extracted' assert isinstance(get_parametric_datasets(script.splitlines()), list)
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) master_job._commit() return True
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) master_job._commit() return True
def _resubmit(self): """Resubmit a DIRAC job""" j = self.getJobObject() parametric = False script_path = os.path.join(j.getInputWorkspace().getPath(), 'dirac-script.py') # Check old script if j.master is None and not os.path.exists(script_path): raise BackendError('Dirac', 'No "dirac-script.py" found in j.inputdir') if j.master is not None and not os.path.exists(script_path): script_path = os.path.join( j.master.getInputWorkspace().getPath(), 'dirac-script.py') if not os.path.exists(script_path): raise BackendError('Dirac', 'No "dirac-script.py" found in j.inputdir or j.master.inputdir') parametric = True # Read old script f = open(script_path, 'r') script = f.read() f.close() # Create new script - ##note instead of using get_parametric_dataset # could just use j.inputdata. if parametric is True: parametric_datasets = get_parametric_datasets(script.split('\n')) if j.master: if len(parametric_datasets) != len(j.master.subjobs): raise BackendError('Dirac', 'number of parametric datasets defined in API script doesn\'t match number of master.subjobs') if j.inputdata and len(j.inputdata) > 0: _input_files = [f for f in j.inputdata if not isType(f, DiracFile)] else: _input_files = [] if set(parametric_datasets[j.id]).symmetric_difference(set([f.namePattern for f in _input_files])): raise BackendError( 'Dirac', 'Mismatch between dirac-script and job attributes.') script = script.replace('.setParametricInputData(%s)' % str(parametric_datasets), '.setInputData(%s)' % str(parametric_datasets[j.id])) script = script.replace('%n', str(j.id)) # name start_user_settings = '# <-- user settings\n' new_script = script[ :script.find(start_user_settings) + len(start_user_settings)] job_ident = get_job_ident(script.split('\n')) for key, value in self.settings.iteritems(): if str(key).startswith('set'): _key = key[3:] else: _key = key if type(value) is str: template = '%s.set%s("%s")\n' else: template = '%s.set%s(%s)\n' new_script += template % (job_ident, str(_key), str(value)) new_script += script[script.find('# user settings -->'):] # Save new script new_script_filename = os.path.join(j.getInputWorkspace().getPath(), 'dirac-script.py') f = open(new_script_filename, 'w') f.write(new_script) f.flush() f.close() return self._common_submit(new_script_filename)