def test_resolved_tool_contract_is_distributed(self): workflow_options_json = os.path.join(self.job_dir, "workflow", "options-workflow.json") workflow_options = json.load(open(workflow_options_json)) distributed_mode = workflow_options[ "pbsmrtpipe.options.distributed_mode"] tasks_dir = os.path.join(self.job_dir, "tasks") for task_dir in os.listdir(tasks_dir): if task_dir.startswith("."): continue tc_file = os.path.join(tasks_dir, task_dir, "tool-contract.json") tc = load_tool_contract_from(tc_file) rtc_file = os.path.join(tasks_dir, task_dir, "resolved-tool-contract.json") rtc = load_resolved_tool_contract_from(rtc_file) if distributed_mode and tc.task.is_distributed: self.assertTrue( rtc.task.is_distributed, "Resolved tool contract {f} has unexpected is_distributed=False" .format(f=rtc_file)) else: self.assertFalse( rtc.task.is_distributed, "Resolved tool contract {f} has unexpected is_distributed=True" .format(f=rtc_file))
def pacbio_args_or_contract_runner(argv, parser, args_runner_func, contract_tool_runner_func, alog, setup_log_func): """ For tools that understand resolved_tool_contracts, but can't emit tool contracts (they may have been written by hand) :param parser: argparse Parser :type parser: ArgumentParser :param args_runner_func: func(args) => int signature :param contract_tool_runner_func: func(tool_contract_instance) should be the signature :param alog: a python log instance :param setup_log_func: func(log_instance) => void signature :return: int return code :rtype: int """ def _log_not_none(msg): if alog is not None: alog.info(msg) # circumvent the argparse parsing by inspecting the raw argv, then create # a temporary parser with limited arguments to process the special case of # --resolved-cool-contract (while still respecting verbosity flags). if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv): p_tmp = get_default_argparser(version=parser.version, description=parser.description) add_resolved_tool_contract_option( add_base_options(p_tmp, default_level="NOTSET")) args_tmp = p_tmp.parse_args(argv) resolved_tool_contract = load_resolved_tool_contract_from( args_tmp.resolved_tool_contract) _log_not_none( "Successfully loaded resolved tool contract from {a}".format( a=argv)) # XXX if one of the logging flags was specified, that takes precedence, # otherwise use the log level in the resolved tool contract. note that # this takes advantage of the fact that argparse allows us to use # NOTSET as the default level even though it's not one of the choices. log_level = get_parsed_args_log_level(args_tmp, default_level=logging.NOTSET) if log_level == logging.NOTSET: log_level = resolved_tool_contract.task.log_level with TemporaryResourcesManager(resolved_tool_contract) as tmp_mgr: r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract, level=log_level) _log_not_none("Completed running resolved contract. {c}".format( c=resolved_tool_contract)) return r else: # tool was called with the standard commandline invocation return pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func)
def test_01(self): file_name = "resolved_tool_contract_dev_app.json" rtc = load_resolved_tool_contract_from(get_data_file(file_name)) self.assertIsInstance(rtc, ResolvedToolContract) d = get_temp_dir("rtc-app") f = get_temp_file("-resolved-tool-contract.avro", d) write_resolved_tool_contract_avro(rtc, f)
def test_01(self): file_name = "resolved_tool_contract_dev_app.json" rtc = load_resolved_tool_contract_from(get_resolved_tool_contract(file_name)) self.assertIsInstance(rtc, ResolvedToolContract) d = get_temp_dir("rtc-app") f = get_temp_file("-resolved-tool-contract.avro", d) write_resolved_tool_contract_avro(rtc, f)
def pacbio_args_or_contract_runner(argv, parser, args_runner_func, contract_tool_runner_func, alog, setup_log_func): """ For tools that understand resolved_tool_contracts, but can't emit tool contracts (they may have been written by hand) :param parser: argparse Parser :type parser: ArgumentParser :param args_runner_func: func(args) => int signature :param contract_tool_runner_func: func(tool_contract_instance) should be the signature :param alog: a python log instance :param setup_log_func: func(log_instance) => void signature :return: int return code :rtype: int """ def _log_not_none(msg): if alog is not None: alog.info(msg) # circumvent the argparse parsing by inspecting the raw argv, then create # a temporary parser with limited arguments to process the special case of # --resolved-cool-contract (while still respecting verbosity flags). if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv): p_tmp = get_default_argparser(version=parser.version, description=parser.description) add_resolved_tool_contract_option(add_base_options(p_tmp, default_level="NOTSET")) args_tmp = p_tmp.parse_args(argv) resolved_tool_contract = load_resolved_tool_contract_from( args_tmp.resolved_tool_contract) _log_not_none("Successfully loaded resolved tool contract from {a}".format(a=argv)) # XXX if one of the logging flags was specified, that takes precedence, # otherwise use the log level in the resolved tool contract. note that # this takes advantage of the fact that argparse allows us to use # NOTSET as the default level even though it's not one of the choices. log_level = get_parsed_args_log_level(args_tmp, default_level=logging.NOTSET) if log_level == logging.NOTSET: log_level = resolved_tool_contract.task.log_level with TemporaryResourcesManager(resolved_tool_contract) as tmp_mgr: r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract, level=log_level) _log_not_none("Completed running resolved contract. {c}".format(c=resolved_tool_contract)) return r else: # tool was called with the standard commandline invocation return pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func)
def _get_rtc_dataset_uuids(report_file, use_outputs=False): rtc_path = op.join(op.dirname(report_file), "resolved-tool-contract.json") rtc = load_resolved_tool_contract_from(rtc_path) all_files = rtc.task.input_files if use_outputs: all_files = rtc.task.output_files ds_uuids = set() for file_name in all_files: if file_name.endswith(".xml"): with openDataSet(file_name) as ds: ds_uuids.add(ds.uuid) return ds_uuids
def loadRtcs(cls): cls.tasks_dir = os.path.join(cls.job_dir, "tasks") task_contents = os.listdir(cls.tasks_dir) cls.resolved_tool_contracts = [] for task_name in task_contents: task_dir = os.path.join(cls.tasks_dir, task_name) if not os.path.isdir(task_dir): continue task_id, job_id = task_name.split("-") rtc_json = os.path.join(task_dir, "resolved-tool-contract.json") if not os.path.isfile(rtc_json): log.warn("Can't find %s" % rtc_json) continue rtc = load_resolved_tool_contract_from(rtc_json) cls.resolved_tool_contracts.append(rtc)
def test_resolved_tool_contract_is_distributed(self): workflow_options_json = os.path.join(self.job_dir, "workflow", "options-workflow.json") workflow_options = json.load(open(workflow_options_json)) distributed_mode = workflow_options["pbsmrtpipe.options.distributed_mode"] tasks_dir = os.path.join(self.job_dir, "tasks") for task_dir in os.listdir(tasks_dir): if task_dir.startswith("."): continue tc_file = os.path.join(tasks_dir, task_dir, "tool-contract.json") tc = load_tool_contract_from(tc_file) rtc_file = os.path.join(tasks_dir, task_dir, "resolved-tool-contract.json") rtc = load_resolved_tool_contract_from(rtc_file) if distributed_mode and tc.task.is_distributed: self.assertTrue(rtc.task.is_distributed, "Resolved tool contract {f} has unexpected is_distributed=False".format(f=rtc_file)) else: self.assertFalse(rtc.task.is_distributed, "Resolved tool contract {f} has unexpected is_distributed=True".format(f=rtc_file))
def pacbio_args_or_contract_runner(argv, parser, args_runner_func, contract_tool_runner_func, alog, setup_log_func): """ For tools that understand resolved_tool_contracts, but can't emit tool contracts (they may have been written by hand) :param parser: argparse Parser :type parser: ArgumentParser :param args_runner_func: func(args) => int signature :param contract_tool_runner_func: func(tool_contract_instance) should be the signature :param alog: a python log instance :param setup_log_func: func(log_instance) => void signature :return: int return code :rtype: int """ # circumvent the argparse parsing by inspecting the raw argv, then manually # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the # parser.parse_args(args) machinery if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv): print "Attempting to Load resolved tool contract from {a}".format( a=argv) # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called resolved_tool_contract_path = _get_resolved_tool_contract_from_argv( argv) resolved_tool_contract = load_resolved_tool_contract_from( resolved_tool_contract_path) r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract) # alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract)) return r else: # tool was called with the standard commandline invocation return pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func)
def test_rtc_output_files_in_datastore(self): """ Confirm that all output files listed in resolved tool contracts are represented in the datastore. """ datastore = None p = os.path.join(self.job_dir, "workflow", "datastore.json") with open(p, 'r') as r: datastore = json.loads(r.read()) tasks_dir = os.path.join(self.job_dir, "tasks") datastore_output_files = {f["path"] for f in datastore["files"]} datastore_uuids = {f["uniqueId"] for f in datastore["files"]} for task_dir in os.listdir(tasks_dir): if task_dir.startswith("."): continue rtc_file = os.path.join(tasks_dir, task_dir, "resolved-tool-contract.json") rtc = load_resolved_tool_contract_from(rtc_file) for ofn in rtc.task.output_files: self.assertTrue(ofn in datastore_output_files, "{o} not found in datastore".format(o=ofn))
def pacbio_args_or_contract_runner(argv, parser, args_runner_func, contract_tool_runner_func, alog, setup_log_func): """ For tools that understand resolved_tool_contracts, but can't emit tool contracts (they may have been written by hand) :param parser: argparse Parser :type parser: ArgumentParser :param args_runner_func: func(args) => int signature :param contract_tool_runner_func: func(tool_contract_instance) should be the signature :param alog: a python log instance :param setup_log_func: func(log_instance) => void signature :return: int return code :rtype: int """ # circumvent the argparse parsing by inspecting the raw argv, then manually # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the # parser.parse_args(args) machinery if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv): print "Attempting to Load resolved tool contract from {a}".format(a=argv) # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called resolved_tool_contract_path = _get_resolved_tool_contract_from_argv(argv) resolved_tool_contract = load_resolved_tool_contract_from(resolved_tool_contract_path) r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract) if alog is not None: alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract)) return r else: # tool was called with the standard commandline invocation return pacbio_args_runner(argv, parser, args_runner_func, alog, setup_log_func)
def test_01(self): path = get_data_file("dev_example_resolved_tool_contract.json") rtc = load_resolved_tool_contract_from(path) log.info(rtc) self.assertIsNotNone(rtc)