def get_workflow(): scope = 'data16_13TeV' name = 'data16_13TeV.00298862.physics_Main.daq.RAW' src_rse = 'NDGF-T1_DATATAPE' dest_rse = 'NDGF-T1_DATADISK' rule_id = get_rule_id(scope, name, src_rse, dest_rse) work = ATLASStageinWork(executable=None, arguments=None, parameters=None, setup=None, exec_type='local', sandbox=None, work_id=None, primary_input_collection={ 'scope': scope, 'name': name }, other_input_collections=None, output_collections={ 'scope': scope, 'name': name + '.idds.stagein' }, log_collections=None, logger=None, max_waiting_time=3600 * 7 * 24, src_rse=src_rse, dest_rse=dest_rse, rule_id=rule_id) wf = Workflow() wf.add_work(work) # work.set_workflow(wf) return wf
def test_workflow(self): """ Workflow: Test workflow """ # init_p = Parameter({'input_dataset': 'data17:data17.test.raw.1'}) work1 = Work(executable='/bin/hostname', arguments=None, sandbox=None, work_id=1) work2 = Work(executable='echo', arguments='--in=IN_DATASET --out=OUT_DATASET', sandbox=None, work_id=2, primary_input_collection={'scope': 'data17', 'name': 'data17.test.raw.1'}, output_collections=[{'scope': 'data17', 'name': 'data17.test.work2'}]) work3 = Work(executable='echo', arguments='--in=IN_DATASET --out=OUT_DATASET', sandbox=None, work_id=3, primary_input_collection={'scope': 'data17', 'name': 'data17.test.work2'}, output_collections=[{'scope': 'data17', 'name': 'data17.test.work3'}]) workflow = Workflow() workflow.add_work(work1, initial=True) workflow.add_work(work2, initial=True) workflow.add_work(work3, initial=False) cond = Condition(cond=work2.is_finished, true_work=work3) # print(cond.all_works()) workflow.add_condition(cond) # check # workflow_str = workflow.serialize() # workflow1 = Workflow.deserialize(workflow_str) # print(workflow_str) # print(workflow1) works = workflow.get_current_works() # print([str(work) for work in works]) # print([w.work_id for w in works]) assert(works == [])
def init(self): # init_p = Parameter({'input_dataset': 'data17:data17.test.raw.1'}) work1 = Work(executable='/bin/hostname', arguments=None, sandbox=None, work_id=1) work2 = Work(executable='echo', arguments='--in=IN_DATASET --out=OUT_DATASET', sandbox=None, work_id=2, primary_input_collection={'scope': 'data17', 'name': 'data17.test.raw.1'}, output_collections=[{'scope': 'data17', 'name': 'data17.test.work2'}]) work3 = Work(executable='echo', arguments='--in=IN_DATASET --out=OUT_DATASET', sandbox=None, work_id=3, primary_input_collection={'scope': 'data17', 'name': 'data17.test.work2'}, output_collections=[{'scope': 'data17', 'name': 'data17.test.work3'}]) workflow = Workflow() workflow.add_work(work1, initial=True) workflow.add_work(work2, initial=True) workflow.add_work(work3, initial=False) cond = Condition(cond=work2.is_finished, true_work=work3) workflow.add_condition(cond) return workflow
def get_workflow(): from idds.workflowv2.workflow import Workflow from idds.atlas.workflowv2.atlashpowork import ATLASHPOWork # request_metadata for predefined method 'nevergrad' request_metadata = {'workload_id': '20525135', 'sandbox': None, 'method': 'nevergrad', 'opt_space': {"A": {"type": "Choice", "params": {"choices": [1, 4]}}, "B": {"type": "Scalar", "bounds": [0, 5]}}, 'initial_points': [({'A': 1, 'B': 2}, 0.3), ({'A': 1, 'B': 3}, None)], 'max_points': 20, 'num_points_per_generation': 10} # noqa E501 # request_metadata for docker method request_metadata = {'workload_id': '20525134', 'sandbox': 'wguanicedew/idds_hpo_nevergrad', 'workdir': '/data', 'executable': 'docker', 'arguments': 'python /opt/hyperparameteropt_nevergrad.py --max_points=%MAX_POINTS --num_points=%NUM_POINTS --input=/data/%IN --output=/data/%OUT', 'output_json': 'output.json', 'opt_space': {"A": {"type": "Choice", "params": {"choices": [1, 4]}}, "B": {"type": "Scalar", "bounds": [0, 5]}}, 'initial_points': [({'A': 1, 'B': 2}, 0.3), ({'A': 1, 'B': 3}, None)], 'max_points': 20, 'num_points_per_generation': 10} # noqa E501 # request_metadata for docker toymc method request_metadata = {'workload_id': '20525147', 'sandbox': 'wguanicedew/idds_hpo_toymc', 'workdir': '/data', 'executable': 'docker', 'arguments': 'python /opt/hyperparameteropt_toymc.py --max_points=%MAX_POINTS --num_points=%NUM_POINTS --input=/data/%IN --output=/data/%OUT', 'output_json': 'output.json', 'opt_space': {"A": {}}, 'initial_points': [], 'max_points': 20, 'num_points_per_generation': 10} # noqa E501 work = ATLASHPOWork(executable=request_metadata.get('executable', None), arguments=request_metadata.get('arguments', None), parameters=request_metadata.get('parameters', None), setup=None, exec_type='local', sandbox=request_metadata.get('sandbox', None), method=request_metadata.get('method', None), container_workdir=request_metadata.get('workdir', None), output_json=request_metadata.get('output_json', None), opt_space=request_metadata.get('opt_space', None), initial_points=request_metadata.get('initial_points', None), max_points=request_metadata.get('max_points', None), num_points_per_iteration=request_metadata.get('num_points_per_iteration', 10)) wf = Workflow() wf.set_workload_id(request_metadata.get('workload_id', None)) wf.add_work(work) return wf
def setup_workflow(): taskN1 = PanDATask() taskN1.step = "step1" taskN1.name = taskN1.step + "_" + randStr() taskN1.dependencies = [{ "name": "00000" + str(k), "dependencies": [], "submitted": False } for k in range(6)] taskN2 = PanDATask() taskN2.step = "step2" taskN2.name = taskN2.step + "_" + randStr() taskN2.dependencies = [{ "name": "000010", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }, { "name": "000011", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }, { "name": "000012", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }] taskN3 = PanDATask() taskN3.step = "step3" taskN3.name = taskN3.step + "_" + randStr() taskN3.dependencies = [ { "name": "000020", "dependencies": [], "submitted": False }, { "name": "000021", "dependencies": [{ "task": taskN2.name, "inputname": "000010", "available": False }, { "task": taskN2.name, "inputname": "000011", "available": False }], "submitted": False }, { "name": "000022", "dependencies": [{ "task": taskN2.name, "inputname": "000011", "available": False }, { "task": taskN2.name, "inputname": "000012", "available": False }], "submitted": False }, { "name": "000023", "dependencies": [], "submitted": False }, { "name": "000024", "dependencies": [{ "task": taskN3.name, "inputname": "000021", "available": False }, { "task": taskN3.name, "inputname": "000023", "available": False }], "submitted": False }, ] work1 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#1' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#1' }], log_collections=[], dependency_map=taskN1.dependencies, task_name=taskN1.name, task_queue=task_queue, encode_command_line=True, task_log={ "dataset": "PandaJob_#{pandaid}/", "destination": "local", "param_type": "log", "token": "local", "type": "template", "value": "log.tgz" }, task_cloud='LSST') work2 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#2' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#2' }], log_collections=[], dependency_map=taskN2.dependencies, task_name=taskN2.name, task_queue=task_queue, encode_command_line=True, task_log={ "dataset": "PandaJob_#{pandaid}/", "destination": "local", "param_type": "log", "token": "local", "type": "template", "value": "log.tgz" }, task_cloud='LSST') work3 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#3' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#3' }], log_collections=[], dependency_map=taskN3.dependencies, task_name=taskN3.name, task_queue=task_queue, encode_command_line=True, task_log={ "dataset": "PandaJob_#{pandaid}/", "destination": "local", "param_type": "log", "token": "local", "type": "template", "value": "log.tgz" }, task_cloud='LSST') pending_time = 12 # pending_time = None workflow = Workflow(pending_time=pending_time) workflow.add_work(work1) workflow.add_work(work2) workflow.add_work(work3) workflow.name = 'test_workflow.idds.%s.test' % time.time() return workflow
def get_workflow(): taskN1 = PanDATask() taskN1.step = "step1" taskN1.name = taskN1.step + "_" + randStr() taskN1.dependencies = [{ "name": "00000" + str(k), "dependencies": [], "submitted": False } for k in range(6)] taskN2 = PanDATask() taskN2.step = "step2" taskN2.name = taskN2.step + "_" + randStr() taskN2.dependencies = [{ "name": "000010", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }, { "name": "000011", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }, { "name": "000012", "dependencies": [{ "task": taskN1.name, "inputname": "000001", "available": False }, { "task": taskN1.name, "inputname": "000002", "available": False }], "submitted": False }] taskN3 = PanDATask() taskN3.step = "step3" taskN3.name = taskN3.step + "_" + randStr() taskN3.dependencies = [ { "name": "000020", "dependencies": [], "submitted": False }, { "name": "000021", "dependencies": [{ "task": taskN2.name, "inputname": "000010", "available": False }, { "task": taskN2.name, "inputname": "000011", "available": False }], "submitted": False }, { "name": "000022", "dependencies": [{ "task": taskN2.name, "inputname": "000011", "available": False }, { "task": taskN2.name, "inputname": "000012", "available": False }], "submitted": False }, { "name": "000023", "dependencies": [], "submitted": False }, { "name": "000024", "dependencies": [{ "task": taskN3.name, "inputname": "000021", "available": False }, { "task": taskN3.name, "inputname": "000023", "available": False }], "submitted": False }, ] work1 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#1' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#1' }], log_collections=[], dependency_map=taskN1.dependencies, task_name=taskN1.name, task_queue=task_queue) work2 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#2' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#2' }], log_collections=[], dependency_map=taskN2.dependencies, task_name=taskN2.name, task_queue=task_queue) work3 = DomaPanDAWork(executable='echo', primary_input_collection={ 'scope': 'pseudo_dataset', 'name': 'pseudo_input_collection#3' }, output_collections=[{ 'scope': 'pseudo_dataset', 'name': 'pseudo_output_collection#3' }], log_collections=[], dependency_map=taskN3.dependencies, task_name=taskN3.name, task_queue=task_queue) workflow = Workflow() workflow.add_work(work1) workflow.add_work(work2) workflow.add_work(work3) return workflow
def get_workflow(): from idds.workflowv2.workflow import Workflow, Condition from idds.atlas.workflowv2.atlaspandawork import ATLASPandaWork task_parameters1 = { "architecture": "", "cliParams": "prun --exec 'echo %RNDM:10 > seed.txt' --outputs seed.txt --nJobs 2 --outDS user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top", "excludedSite": [], "includedSite": None, "jobParameters": [{ "type": "constant", "value": "-j \"\" --sourceURL ${SURL}" }, { "type": "constant", "value": "-r ." }, { "padding": False, "type": "constant", "value": "-p \"" }, { "padding": False, "type": "constant", "value": "echo%20%25RNDM%3A10%20%3E%20seed.txt" }, { "type": "constant", "value": "\"" }, { "type": "constant", "value": "-a jobO.185663cd-6df9-4ac8-adf9-0d9bf9d5892e.tar.gz" }, { "container": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top_seed.txt/", "dataset": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top_seed.txt/", "hidden": True, "param_type": "output", "type": "template", "value": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top.$JEDITASKID._${SN/P}.seed.txt" }, { "type": "constant", "value": "-o \"{'seed.txt': 'user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top.$JEDITASKID._${SN/P}.seed.txt'}\"" }], "log": { "container": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top/", "dataset": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top/", "param_type": "log", "type": "template", "value": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top.log.$JEDITASKID.${SN}.log.tgz" }, "nEvents": 2, "nEventsPerJob": 1, "nMaxFilesPerJob": 200, "noInput": True, "osInfo": "Linux-3.10.0-1160.36.2.el7.x86_64-x86_64-with-centos-7.9.2009-Core", "processingType": "panda-client-1.4.80-jedi-run", "prodSourceLabel": "user", "respectSplitRule": True, "site": None, "sourceURL": "https://aipanda048.cern.ch:25443", "taskName": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top", "transHome": None, "transUses": "", "uniqueTaskName": True, "userName": "******", "vo": "atlas" } task_parameters2 = { "architecture": "", "cliParams": "prun --exec 'echo %IN > results.root' --outputs results.root --forceStaged --inDS user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top_seed.txt/ --outDS user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom", "excludedSite": [], "includedSite": None, "jobParameters": [{ "type": "constant", "value": "-j \"\" --sourceURL ${SURL}" }, { "type": "constant", "value": "-r ." }, { "padding": False, "type": "constant", "value": "-p \"" }, { "padding": False, "type": "constant", "value": "echo%20%25IN%20%3E%20results.root" }, { "type": "constant", "value": "\"" }, { "type": "constant", "value": "-a jobO.185663cd-6df9-4ac8-adf9-0d9bf9d5892e.tar.gz" }, { "dataset": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top_seed.txt/", "exclude": "\\.log\\.tgz(\\.\\d+)*$", "expand": True, "param_type": "input", "type": "template", "value": "-i \"${IN/T}\"" }, { "container": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom_results.root/", "dataset": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom_results.root/", "hidden": True, "param_type": "output", "type": "template", "value": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom.$JEDITASKID._${SN/P}.results.root" }, { "type": "constant", "value": "-o \"{'results.root': 'user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom.$JEDITASKID._${SN/P}.results.root'}\"" }], "log": { "container": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom/", "dataset": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom/", "param_type": "log", "type": "template", "value": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom.log.$JEDITASKID.${SN}.log.tgz" }, "nMaxFilesPerJob": 200, "noWaitParent": True, "osInfo": "Linux-3.10.0-1160.36.2.el7.x86_64-x86_64-with-centos-7.9.2009-Core", "parentTaskName": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_000_top", "processingType": "panda-client-1.4.80-jedi-run", "prodSourceLabel": "user", "respectSplitRule": True, "site": None, "sourceURL": "https://aipanda048.cern.ch:25443", "taskName": "user.tmaeno.389eb4c5-5db6-4b80-82aa-9edfae6dfb38_001_bottom", "transHome": None, "transUses": "", "uniqueTaskName": True, "userName": "******", "vo": "atlas" } work1 = ATLASPandaWork(task_parameters=task_parameters1) work2 = ATLASPandaWork(task_parameters=task_parameters2) wf = Workflow() wf.set_workload_id(234567) wf.add_work(work1) wf.add_work(work2) # cond = Condition(cond=work1.is_finished, true_work=work2) cond = Condition(cond=work1.is_started, true_work=work2) wf.add_condition(cond) return wf
def convert_req2reqv2(req): # v1: {'created_at': datetime.datetime(2020, 11, 3, 10, 9, 32), 'substatus': None, 'priority': 0, 'transform_tag': '2', 'requester': 'panda', 'request_metadata': {'workload_id': 23083304, 'rule_id': 'bef3da17f17c49ac97863bb9e96af672'}, 'name': 'valid1.361027.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ7W.simul.HITS.e5112_s3227_tid12560193_00', 'request_id': 3775, 'accessed_at': datetime.datetime(2020, 11, 3, 10, 9, 32), 'updated_at': datetime.datetime(2020, 11, 3, 10, 9, 32), 'locking': <RequestLocking.Idle: 0>, 'status': <RequestStatus.Cancelled: 9>, 'workload_id': 23083304, 'request_type': <RequestType.StageIn: 2>, 'errors': None, 'processing_metadata': None, 'scope': 'valid1', 'expired_at': datetime.datetime(2020, 12, 3, 10, 9, 32), 'next_poll_at': datetime.datetime(2020, 11, 3, 10, 9, 32)} # noqa E501 if req['request_type'] == RequestType.StageIn: request_metadata = req['request_metadata'] work = ATLASStageinWork( executable=None, arguments=None, parameters=None, setup=None, exec_type='local', sandbox=None, primary_input_collection={ 'scope': req['scope'], 'name': req['name'] }, other_input_collections=None, output_collections={ 'scope': req['scope'], 'name': req['name'] + '.idds.stagein' }, log_collections=None, logger=None, max_waiting_time=request_metadata.get('max_waiting_time', 3600 * 7 * 24), src_rse=request_metadata.get('src_rse', None), dest_rse=request_metadata.get('dest_rse', None), rule_id=request_metadata.get('rule_id', None)) if req['request_type'] == RequestType.Workflow: pass ori_workflow = req['request_metadata']['workflow'] ori_work = ori_workflow.works[ori_workflow.primary_initial_work] input_coll = ori_work.collections[ori_work.primary_input_collection] work = ATLASStageinWork(executable=None, arguments=None, parameters=None, setup=None, exec_type='local', sandbox=None, primary_input_collection={ 'scope': input_coll['scope'], 'name': input_coll['name'] }, other_input_collections=None, output_collections={ 'scope': input_coll['scope'], 'name': input_coll['name'] + '.idds.stagein' }, log_collections=None, logger=None, max_waiting_time=ori_work.max_waiting_time, src_rse=ori_work.src_rse, dest_rse=ori_work.dest_rse, rule_id=ori_work.rule_id) workload_id = req['workload_id'] if not workload_id and 'workload_id' in request_metadata: workload_id = request_metadata['workload_id'] wf = Workflow() wf.set_workload_id(workload_id) wf.add_work(work) host = get_rest_host() wm = ClientManager(host=host) request_id = wm.submit(wf) # print(request_id) return request_id
def get_workflow(): task_param_map = define_panda_task_paramsmap() work = ATLASPandaWork(panda_task_paramsmap=task_param_map) # it's needed to parse the panda task parameter information, for example output dataset name, for the next task. # if the information is not needed, you don't need to run it manually. iDDS will call it interally to parse the information. work.initialize_work() work_output_coll = work.get_output_collections()[0] input_coll = {'scope': work_output_coll['scope'], 'name': work_output_coll['name'], 'coll_metadata': {'force_close': True}} output_coll = {'scope': work_output_coll['scope'], 'name': work_output_coll['name'] + "." + str(int(time.time()))} # acutator = ATLASActuatorWork(executable='python', arguments='merge.py {output_json} {events} {dataset}/{filename}', acutator = ATLASActuatorWork(executable='python', arguments='merge.py {output_json} {events} {dataset}', parameters={'output_json': 'merge.json', 'events': 200, 'dataset': '{scope}:{name}'.format(**input_coll), 'filename': 'output*.json'}, sandbox=work.sandbox, primary_input_collection=input_coll, output_collections=output_coll, output_json='merge.json') wf = Workflow() # because the two tasks are in a loop. It's good to set which one to start. wf.add_work(work) wf.add_work(acutator) cond = Condition(work.is_finished, current_work=work, true_work=acutator, false_work=None) wf.add_condition(cond) cond1 = Condition(acutator.generate_new_task, current_work=acutator, true_work=work, false_work=None) wf.add_condition(cond1) # because the two works are in a loop, they are not independent. This call is needed to tell which one to start. # otherwise idds will use the first one to start. wf.add_initial_works(work) # work.set_workflow(wf) return wf