def test_resolved_tool_contract_is_distributed(self):
     workflow_options_json = os.path.join(self.job_dir, "workflow",
                                          "options-workflow.json")
     workflow_options = json.load(open(workflow_options_json))
     distributed_mode = workflow_options[
         "pbsmrtpipe.options.distributed_mode"]
     tasks_dir = os.path.join(self.job_dir, "tasks")
     for task_dir in os.listdir(tasks_dir):
         if task_dir.startswith("."):
             continue
         tc_file = os.path.join(tasks_dir, task_dir, "tool-contract.json")
         tc = load_tool_contract_from(tc_file)
         rtc_file = os.path.join(tasks_dir, task_dir,
                                 "resolved-tool-contract.json")
         rtc = load_resolved_tool_contract_from(rtc_file)
         if distributed_mode and tc.task.is_distributed:
             self.assertTrue(
                 rtc.task.is_distributed,
                 "Resolved tool contract {f} has unexpected is_distributed=False"
                 .format(f=rtc_file))
         else:
             self.assertFalse(
                 rtc.task.is_distributed,
                 "Resolved tool contract {f} has unexpected is_distributed=True"
                 .format(f=rtc_file))
Beispiel #2
0
def pacbio_args_or_contract_runner(argv, parser, args_runner_func,
                                   contract_tool_runner_func, alog,
                                   setup_log_func):
    """
    For tools that understand resolved_tool_contracts, but can't emit
    tool contracts (they may have been written by hand)

    :param parser: argparse Parser
    :type parser: ArgumentParser

    :param args_runner_func: func(args) => int signature

    :param contract_tool_runner_func: func(tool_contract_instance) should be the signature

    :param alog: a python log instance
    :param setup_log_func: func(log_instance) => void signature

    :return: int return code
    :rtype: int
    """
    def _log_not_none(msg):
        if alog is not None:
            alog.info(msg)

    # circumvent the argparse parsing by inspecting the raw argv, then create
    # a temporary parser with limited arguments to process the special case of
    # --resolved-cool-contract (while still respecting verbosity flags).
    if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
        p_tmp = get_default_argparser(version=parser.version,
                                      description=parser.description)
        add_resolved_tool_contract_option(
            add_base_options(p_tmp, default_level="NOTSET"))
        args_tmp = p_tmp.parse_args(argv)
        resolved_tool_contract = load_resolved_tool_contract_from(
            args_tmp.resolved_tool_contract)
        _log_not_none(
            "Successfully loaded resolved tool contract from {a}".format(
                a=argv))
        # XXX if one of the logging flags was specified, that takes precedence,
        # otherwise use the log level in the resolved tool contract.  note that
        # this takes advantage of the fact that argparse allows us to use
        # NOTSET as the default level even though it's not one of the choices.
        log_level = get_parsed_args_log_level(args_tmp,
                                              default_level=logging.NOTSET)
        if log_level == logging.NOTSET:
            log_level = resolved_tool_contract.task.log_level
        with TemporaryResourcesManager(resolved_tool_contract) as tmp_mgr:
            r = _pacbio_main_runner(alog,
                                    setup_log_func,
                                    contract_tool_runner_func,
                                    resolved_tool_contract,
                                    level=log_level)
            _log_not_none("Completed running resolved contract. {c}".format(
                c=resolved_tool_contract))
            return r
    else:
        # tool was called with the standard commandline invocation
        return pacbio_args_runner(argv, parser, args_runner_func, alog,
                                  setup_log_func)
    def test_01(self):
        file_name = "resolved_tool_contract_dev_app.json"
        rtc = load_resolved_tool_contract_from(get_data_file(file_name))
        self.assertIsInstance(rtc, ResolvedToolContract)

        d = get_temp_dir("rtc-app")
        f = get_temp_file("-resolved-tool-contract.avro", d)
        write_resolved_tool_contract_avro(rtc, f)
Beispiel #4
0
    def test_01(self):
        file_name = "resolved_tool_contract_dev_app.json"
        rtc = load_resolved_tool_contract_from(get_resolved_tool_contract(file_name))
        self.assertIsInstance(rtc, ResolvedToolContract)

        d = get_temp_dir("rtc-app")
        f = get_temp_file("-resolved-tool-contract.avro", d)
        write_resolved_tool_contract_avro(rtc, f)
Beispiel #5
0
def pacbio_args_or_contract_runner(argv,
                                   parser,
                                   args_runner_func,
                                   contract_tool_runner_func,
                                   alog, setup_log_func):
    """
    For tools that understand resolved_tool_contracts, but can't emit
    tool contracts (they may have been written by hand)

    :param parser: argparse Parser
    :type parser: ArgumentParser

    :param args_runner_func: func(args) => int signature

    :param contract_tool_runner_func: func(tool_contract_instance) should be
    the signature

    :param alog: a python log instance
    :param setup_log_func: func(log_instance) => void signature
    :return: int return code
    :rtype: int
    """
    def _log_not_none(msg):
        if alog is not None:
            alog.info(msg)

    # circumvent the argparse parsing by inspecting the raw argv, then create
    # a temporary parser with limited arguments to process the special case of
    # --resolved-cool-contract (while still respecting verbosity flags).
    if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
        p_tmp = get_default_argparser(version=parser.version,
            description=parser.description)
        add_resolved_tool_contract_option(add_base_options(p_tmp,
            default_level="NOTSET"))
        args_tmp = p_tmp.parse_args(argv)
        resolved_tool_contract = load_resolved_tool_contract_from(
            args_tmp.resolved_tool_contract)
        _log_not_none("Successfully loaded resolved tool contract from {a}".format(a=argv))
        # XXX if one of the logging flags was specified, that takes precedence,
        # otherwise use the log level in the resolved tool contract.  note that
        # this takes advantage of the fact that argparse allows us to use
        # NOTSET as the default level even though it's not one of the choices.
        log_level = get_parsed_args_log_level(args_tmp,
            default_level=logging.NOTSET)
        if log_level == logging.NOTSET:
            log_level = resolved_tool_contract.task.log_level
        with TemporaryResourcesManager(resolved_tool_contract) as tmp_mgr:
            r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract, level=log_level)
            _log_not_none("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
            return r
    else:
        # tool was called with the standard commandline invocation
        return pacbio_args_runner(argv, parser, args_runner_func, alog,
                                  setup_log_func)
def _get_rtc_dataset_uuids(report_file, use_outputs=False):
    rtc_path = op.join(op.dirname(report_file), "resolved-tool-contract.json")
    rtc = load_resolved_tool_contract_from(rtc_path)
    all_files = rtc.task.input_files
    if use_outputs:
        all_files = rtc.task.output_files
    ds_uuids = set()
    for file_name in all_files:
        if file_name.endswith(".xml"):
            with openDataSet(file_name) as ds:
                ds_uuids.add(ds.uuid)
    return ds_uuids
Beispiel #7
0
def _get_rtc_dataset_uuids(report_file, use_outputs=False):
    rtc_path = op.join(op.dirname(report_file), "resolved-tool-contract.json")
    rtc = load_resolved_tool_contract_from(rtc_path)
    all_files = rtc.task.input_files
    if use_outputs:
        all_files = rtc.task.output_files
    ds_uuids = set()
    for file_name in all_files:
        if file_name.endswith(".xml"):
            with openDataSet(file_name) as ds:
                ds_uuids.add(ds.uuid)
    return ds_uuids
Beispiel #8
0
 def loadRtcs(cls):
     cls.tasks_dir = os.path.join(cls.job_dir, "tasks")
     task_contents = os.listdir(cls.tasks_dir)
     cls.resolved_tool_contracts = []
     for task_name in task_contents:
         task_dir = os.path.join(cls.tasks_dir, task_name)
         if not os.path.isdir(task_dir):
             continue
         task_id, job_id = task_name.split("-")
         rtc_json = os.path.join(task_dir, "resolved-tool-contract.json")
         if not os.path.isfile(rtc_json):
             log.warn("Can't find %s" % rtc_json)
             continue
         rtc = load_resolved_tool_contract_from(rtc_json)
         cls.resolved_tool_contracts.append(rtc)
Beispiel #9
0
 def loadRtcs(cls):
     cls.tasks_dir = os.path.join(cls.job_dir, "tasks")
     task_contents = os.listdir(cls.tasks_dir)
     cls.resolved_tool_contracts = []
     for task_name in task_contents:
         task_dir = os.path.join(cls.tasks_dir, task_name)
         if not os.path.isdir(task_dir):
             continue
         task_id, job_id = task_name.split("-")
         rtc_json = os.path.join(task_dir, "resolved-tool-contract.json")
         if not os.path.isfile(rtc_json):
             log.warn("Can't find %s" % rtc_json)
             continue
         rtc = load_resolved_tool_contract_from(rtc_json)
         cls.resolved_tool_contracts.append(rtc)
 def test_resolved_tool_contract_is_distributed(self):
     workflow_options_json = os.path.join(self.job_dir, "workflow",
         "options-workflow.json")
     workflow_options = json.load(open(workflow_options_json))
     distributed_mode = workflow_options["pbsmrtpipe.options.distributed_mode"]
     tasks_dir = os.path.join(self.job_dir, "tasks")
     for task_dir in os.listdir(tasks_dir):
         if task_dir.startswith("."):
             continue
         tc_file = os.path.join(tasks_dir, task_dir, "tool-contract.json")
         tc = load_tool_contract_from(tc_file)
         rtc_file = os.path.join(tasks_dir, task_dir,
             "resolved-tool-contract.json")
         rtc = load_resolved_tool_contract_from(rtc_file)
         if distributed_mode and tc.task.is_distributed:
             self.assertTrue(rtc.task.is_distributed,
                 "Resolved tool contract {f} has unexpected is_distributed=False".format(f=rtc_file))
         else:
             self.assertFalse(rtc.task.is_distributed,
                 "Resolved tool contract {f} has unexpected is_distributed=True".format(f=rtc_file))
Beispiel #11
0
def pacbio_args_or_contract_runner(argv, parser, args_runner_func,
                                   contract_tool_runner_func, alog,
                                   setup_log_func):
    """
    For tools that understand resolved_tool_contracts, but can't emit
    tool contracts (they may have been written by hand)

    :param parser: argparse Parser
    :type parser: ArgumentParser

    :param args_runner_func: func(args) => int signature

    :param contract_tool_runner_func: func(tool_contract_instance) should be
    the signature

    :param alog: a python log instance
    :param setup_log_func: func(log_instance) => void signature
    :return: int return code
    :rtype: int
    """

    # circumvent the argparse parsing by inspecting the raw argv, then manually
    # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the
    # parser.parse_args(args) machinery
    if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
        print "Attempting to Load resolved tool contract from {a}".format(
            a=argv)
        # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called
        resolved_tool_contract_path = _get_resolved_tool_contract_from_argv(
            argv)
        resolved_tool_contract = load_resolved_tool_contract_from(
            resolved_tool_contract_path)
        r = _pacbio_main_runner(alog, setup_log_func,
                                contract_tool_runner_func,
                                resolved_tool_contract)
        # alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
        return r
    else:
        # tool was called with the standard commandline invocation
        return pacbio_args_runner(argv, parser, args_runner_func, alog,
                                  setup_log_func)
 def test_rtc_output_files_in_datastore(self):
     """
     Confirm that all output files listed in resolved tool contracts are
     represented in the datastore.
     """
     datastore = None
     p = os.path.join(self.job_dir, "workflow", "datastore.json")
     with open(p, 'r') as r:
         datastore = json.loads(r.read())
     tasks_dir = os.path.join(self.job_dir, "tasks")
     datastore_output_files = {f["path"] for f in datastore["files"]}
     datastore_uuids = {f["uniqueId"] for f in datastore["files"]}
     for task_dir in os.listdir(tasks_dir):
         if task_dir.startswith("."):
             continue
         rtc_file = os.path.join(tasks_dir, task_dir,
                                 "resolved-tool-contract.json")
         rtc = load_resolved_tool_contract_from(rtc_file)
         for ofn in rtc.task.output_files:
             self.assertTrue(ofn in datastore_output_files,
                             "{o} not found in datastore".format(o=ofn))
 def test_rtc_output_files_in_datastore(self):
     """
     Confirm that all output files listed in resolved tool contracts are
     represented in the datastore.
     """
     datastore = None
     p = os.path.join(self.job_dir, "workflow", "datastore.json")
     with open(p, 'r') as r:
         datastore = json.loads(r.read())
     tasks_dir = os.path.join(self.job_dir, "tasks")
     datastore_output_files = {f["path"] for f in datastore["files"]}
     datastore_uuids = {f["uniqueId"] for f in datastore["files"]}
     for task_dir in os.listdir(tasks_dir):
         if task_dir.startswith("."):
             continue
         rtc_file = os.path.join(tasks_dir, task_dir,
             "resolved-tool-contract.json")
         rtc = load_resolved_tool_contract_from(rtc_file)
         for ofn in rtc.task.output_files:
             self.assertTrue(ofn in datastore_output_files,
                             "{o} not found in datastore".format(o=ofn))
Beispiel #14
0
def pacbio_args_or_contract_runner(argv,
                                   parser,
                                   args_runner_func,
                                   contract_tool_runner_func,
                                   alog, setup_log_func):
    """
    For tools that understand resolved_tool_contracts, but can't emit
    tool contracts (they may have been written by hand)

    :param parser: argparse Parser
    :type parser: ArgumentParser

    :param args_runner_func: func(args) => int signature

    :param contract_tool_runner_func: func(tool_contract_instance) should be
    the signature

    :param alog: a python log instance
    :param setup_log_func: func(log_instance) => void signature
    :return: int return code
    :rtype: int
    """

    # circumvent the argparse parsing by inspecting the raw argv, then manually
    # parse out the resolved_tool_contract path. Not awesome, but the only way to skip the
    # parser.parse_args(args) machinery
    if any(a.startswith(RESOLVED_TOOL_CONTRACT_OPTION) for a in argv):
        print "Attempting to Load resolved tool contract from {a}".format(a=argv)
        # FIXME need to catch the exception if raised here before the _pacbio_main_runner is called
        resolved_tool_contract_path = _get_resolved_tool_contract_from_argv(argv)
        resolved_tool_contract = load_resolved_tool_contract_from(resolved_tool_contract_path)
        r = _pacbio_main_runner(alog, setup_log_func, contract_tool_runner_func, resolved_tool_contract)
        if alog is not None:
            alog.info("Completed running resolved contract. {c}".format(c=resolved_tool_contract))
        return r
    else:
        # tool was called with the standard commandline invocation
        return pacbio_args_runner(argv, parser, args_runner_func, alog,
                                  setup_log_func)
 def test_01(self):
     path = get_data_file("dev_example_resolved_tool_contract.json")
     rtc = load_resolved_tool_contract_from(path)
     log.info(rtc)
     self.assertIsNotNone(rtc)