Beispiel #1
0
    def fix_terminal_output_types(self, cwl):
        """
        Fix terminal output types on connection mismatch
        :param cwl: dict
        :return: dict
        """
        if cwl['class'] == 'Workflow':
            steps = cwl_ensure_dict(cwl['steps'], 'id')
            for step_id, step in steps.items():
                if steps[step_id]['run']['class'] == 'Workflow':
                    steps[step_id]['run'] = self.fix_terminal_output_types(
                        steps[step_id]['run']
                    )
            cwl['steps'] = cwl_ensure_array(cwl['steps'], 'id')

            outputs = cwl_ensure_dict(cwl['outputs'], 'id')
            for output_id, output in outputs.items():
                source_type = self.extract_incoming_type(
                    as_list(output.get('outputSource', [])),
                    cwl,
                    output.get('linkMerge',
                               'merge_nested') == 'merge_flattened'
                )
                sink_type = output['type']
                fixed_type = self.fix_terminal_output_type(source_type,
                                                           sink_type)
                if fixed_type:
                    outputs[output_id]['type'] = fixed_type
                    msg = ('Converted terminal output type for {} output '
                           'to {}'.format(output_id, fixed_type))
                    print('[INFO] - ' + msg)
                    logger.info(msg)
            cwl['outputs'] = cwl_ensure_array(outputs, 'id')
        return cwl
 def test_list_to_dict(self):
     """
     Test list to dict conversion
     """
     out_dict = cwl_ensure_dict(self.in_list, id_key="id")
     self.assertIsInstance(out_dict, dict)
     self.assertEqual(out_dict, self.in_dict)
Beispiel #3
0
 def extract_source_type(self, source, cwl):
     """
     Extract type of a connection source
     :param source: string
     :param cwl: dict
     :return: string or dict
     """
     if '/' in source:
         source_step_id = source.split('/')[0]
         source_step_out_id = source.split('/')[1]
         source_step = cwl_ensure_dict(cwl['steps'], 'id')[source_step_id]
         source_type = cwl_ensure_dict(
             source_step['run']['outputs'], 'id'
         )[source_step_out_id]['type']
         if "scatter" in source_step:
             source_type = self.scattered_type(source_type)
     else:
         source_type = cwl_ensure_dict(cwl['inputs'], 'id')[source]['type']
     return source_type
Beispiel #4
0
 def fix_connection_matching(self, cwl):
     """
     Fix connections in a workflow
     :param cwl: dict
     :return: dict
     """
     if isinstance(cwl, dict) and cwl['class'] == 'Workflow':
         cwl['steps'] = cwl_ensure_dict(cwl['steps'], 'id')
         for step_id in cwl['steps'].keys():
             step = self.fix_incoming_connections(step_id, cwl)
             if step['run']['class'] == 'Workflow':
                 step['run'] = self.fix_connection_matching(step['run'])
             cwl['steps'][step_id] = step
         cwl['steps'] = cwl_ensure_array(cwl['steps'], 'id')
     return cwl
Beispiel #5
0
 def fix_incoming_connections(self, step_id, cwl):
     """
     Fix connections for a workflow step
     :param step_id: string
     :param cwl: dict
     :return: dict
     """
     step = cwl_ensure_dict(cwl['steps'], 'id')[step_id]
     step['in'] = cwl_ensure_dict(step['in'], 'id')
     for step_in_id, step_input in step['in'].items():
         if "valueFrom" in step_input:
             continue
         sink_type = cwl_ensure_dict(
             step['run']['inputs'], 'id'
         ).get(step_in_id, {}).get('type')
         if step_in_id in as_list(step.get('scatter')):
             sink_type = self.scattered_type(sink_type)
         source_type = self.extract_incoming_type(
             as_list(step_input.get('source', [])),
             cwl,
             step_input.get('linkMerge',
                            'merge_nested') == 'merge_flattened'
         )
         if (source_type and sink_type and
                 len(as_list(step_input.get('source', []))) < 2):
             value_from = self.transform_type(source_type, sink_type)
             if value_from:
                 step['in'][step_in_id]['valueFrom'] = value_from
                 msg = ('Added step input valueFrom expression "{}" for '
                        'step {}, for input {}'.format(value_from,
                                                       step_id,
                                                       step_in_id))
                 print('[INFO] - ' + msg)
                 logger.info(msg)
     step['in'] = cwl_ensure_array(step['in'], 'id')
     return step
def breakdown_wf_local(
        wf_path: str,
        installed_apps: dict = None,
        nested_wf_json: dict = None,  # use if is_main is false
        steps_dir: str = None,  # use if is_main is false
        is_main: bool = True):
    """
    Recursively walk through all the steps (tools and nested wfs)
     and install them in steps folder.
    Reference them in the main workflow.
    :param wf_path: Path where to dump the tool/workflow
    :param installed_apps: Dict containing already installed apps.
    :param nested_wf_json: None in main call, dict in recursive calls
    :param steps_dir: None in main call, path in recursive calls
    :param is_main: True in main call, False in recursive calls
    :return: (Workflow path, Installed apps dictionary)
    """

    msg = ("Decomposing workflow '{}' and"
           " installing parts in 'steps' folder. "
           "This may take a minute or two.\n"
           "Set log level to INFO"
           " to track decomposing progress.").format(os.path.abspath(wf_path))
    logger.info(msg)
    print(colored(msg, 'green'))

    wf_path = os.path.abspath(wf_path)
    installed_apps = installed_apps or dict()
    base_dir = os.path.dirname(wf_path)
    updated_wf_path = '.'.join(wf_path.split('.')[:-1])
    if is_main:
        updated_wf_path += '_decomposed.cwl'
    else:
        updated_wf_path += '.cwl'

    # Resolve main workflow or use provided json for nested wf
    if is_main and not nested_wf_json:
        wf_json = resolve_cwl(wf_path)
    else:
        wf_json = nested_wf_json

    # Make steps dir
    steps_dir = steps_dir or os.path.join(base_dir, 'steps')
    if not os.path.exists(steps_dir):
        os.mkdir(steps_dir)

    wf_json['steps'] = cwl_ensure_dict(wf_json['steps'], 'id')
    for step_id, step in wf_json['steps'].items():
        app_hash = calc_json_hash(step['run'])
        if app_hash in installed_apps:
            wf_json['steps'][step_id]['run'] = os.path.relpath(
                installed_apps[app_hash], base_dir)
        else:
            tool_path = os.path.join(steps_dir, step_id + '.cwl')
            if step['run']['class'] in ['CommandLineTool', 'ExpressionTool']:
                # Dump run contents to file
                tool_path = safe_dump_yaml(tool_path, step['run'])
                # Add to installed apps to avoid duplicates
                installed_apps[app_hash] = os.path.abspath(tool_path)
                # Add a relative path to wf_json
                wf_json['steps'][step_id]['run'] = os.path.relpath(
                    tool_path, base_dir)
            elif step['run']['class'] == 'Workflow':
                nested_wf, installed_apps = breakdown_wf_local(
                    tool_path,
                    installed_apps=installed_apps,
                    nested_wf_json=step['run'],
                    is_main=False,
                    steps_dir=steps_dir)
                wf_json['steps'][step_id]['run'] = os.path.relpath(
                    nested_wf, base_dir)

    if is_main:
        y = YAML()

        with open(updated_wf_path, 'w') as f:
            walk_tree(wf_json)
            y.dump(wf_json, f)
        msg = ("Rewiring done. "
               "New tools are now connected"
               " in the workflow {}.").format(os.path.abspath(updated_wf_path))
        logger.info(msg)
        print(colored(msg, 'green'))
    else:
        wf_hash = calc_json_hash(wf_json)
        if wf_hash in installed_apps:
            return installed_apps[wf_hash], installed_apps
        else:
            safe_dump_yaml(updated_wf_path, wf_json)
            installed_apps[wf_hash] = os.path.abspath(updated_wf_path)

    return os.path.abspath(updated_wf_path), installed_apps
 def test_dict_to_dict(self):
     """
     Test that cwl_ensure_dict keeps dict
     """
     self.assertEqual(self.in_dict, cwl_ensure_dict(self.in_dict, 'id'))