コード例 #1
0
    def _prepare_specs(self):
        with open(self.yaml_path, "r") as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(),
                                               ["definitions", "instructions"],
                                               GalaxyTrainMLModel.__name__,
                                               "YAML specification")
        ParameterValidator.assert_all_in_valid_list(
            specs.keys(), ["definitions", "instructions", "output"],
            GalaxyTrainMLModel.__name__, "YAML specification")

        ParameterValidator.assert_type_and_value(specs["instructions"], dict,
                                                 GalaxyTrainMLModel.__name__,
                                                 "instructions")

        assert len(list(specs["instructions"].keys())) == 1, f"{GalaxyTrainMLModel.__name__}: one instruction has to be specified under " \
                                                             f"`instructions`, got the following instead: {list(specs['instructions'].keys())}."

        self.instruction_name = list(specs["instructions"].keys())[0]

        ParameterValidator.assert_type_and_value(
            specs['instructions'][self.instruction_name], dict,
            GalaxyTrainMLModel.__name__, self.instruction_name)
        ParameterValidator.assert_keys_present(
            specs['instructions'][self.instruction_name].keys(), ['type'],
            GalaxyTrainMLModel.__name__, self.instruction_name)

        assert specs['instructions'][self.instruction_name]['type'] == TrainMLModelInstruction.__name__[:-11], \
            f"{GalaxyTrainMLModel.__name__}: instruction `type` under {self.instruction_name} has to be {TrainMLModelInstruction.__name__[:-11]} " \
            f"for this tool."

        Util.check_paths(specs, GalaxyTrainMLModel.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
コード例 #2
0
ファイル: ImportHelper.py プロジェクト: rofrank/immuneML
    def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset_name: str) -> RepertoireDataset:
        """
        Function to create a dataset from the metadata and a list of repertoire files and exports dataset pickle file

        Arguments:
            import_class: class to use for import
            params: instance of DatasetImportParams class which includes information on path, columns, result path etc.
            dataset_name: user-defined name of the dataset

        Returns:
            RepertoireDataset object that was created
        """
        metadata = pd.read_csv(params.metadata_file, ",")

        ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__,
                                               f'{dataset_name}: params: metadata_file')

        PathBuilder.build(params.result_path + "repertoires/")

        arguments = [(import_class, row, params) for index, row in metadata.iterrows()]
        with Pool(params.number_of_processes) as pool:
            repertoires = pool.starmap(ImportHelper.load_repertoire_as_object, arguments)

        new_metadata_file = ImportHelper.make_new_metadata_file(repertoires, metadata, params.result_path, dataset_name)

        potential_labels = list(set(metadata.columns.tolist()) - {"filename"})
        dataset = RepertoireDataset(params={key: list(set(metadata[key].values.tolist())) for key in potential_labels},
                                    repertoires=repertoires, metadata_file=new_metadata_file, name=dataset_name)

        PickleExporter.export(dataset, params.result_path)

        return dataset
コード例 #3
0
    def _check_dataset(self, specs):
        ParameterValidator.assert_keys_present(specs["definitions"].keys(), ['datasets'], DatasetGenerationTool.__name__, 'definitions')
        assert len(specs['definitions']['datasets'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one dataset can be defined with this Galaxy tool, got these " \
            f"instead: {list(specs['definitions']['datasets'].keys())}."

        assert len(specs['instructions'].keys()) == 1, \
            f"{DatasetGenerationTool.__name__}: only one instruction of type DatasetExport can be defined with this Galaxy tool, got these " \
            f"instructions instead: {list(specs['instructions'].keys())}."
コード例 #4
0
    def _check_instruction(self, specs):
        instruction_name = Util.check_instruction_type(specs, DatasetGenerationTool.__name__, DatasetExportInstruction.__name__[:-11])

        for key in ['datasets', 'export_formats']:
            ParameterValidator.assert_keys_present(list(specs['instructions'][instruction_name].keys()), [key], DatasetGenerationTool.__name__,
                                                   instruction_name)
            ParameterValidator.assert_type_and_value(specs["instructions"][instruction_name][key], list, DatasetGenerationTool.__name__,
                                                     f"{instruction_name}/{key}")

            assert len(specs['instructions'][instruction_name][key]) == 1, \
                f"{DatasetGenerationTool.__name__}: this tool accepts only one item under {key}, got {specs['instructions'][instruction_name][key]} " \
                f"instead."
コード例 #5
0
    def update_specs(self):
        with open(self.yaml_path, 'r') as file:
            specs = yaml.safe_load(file)

        ParameterValidator.assert_keys_present(specs.keys(), ["definitions", "instructions"], DatasetGenerationTool.__name__, "YAML specification")
        ParameterValidator.assert_all_in_valid_list(specs.keys(), ["definitions", "instructions", "output"], DatasetGenerationTool.__name__, "YAML specification")

        self._check_dataset(specs)
        self._check_instruction(specs)

        Util.check_paths(specs, DatasetGenerationTool.__name__)
        Util.update_result_paths(specs, self.result_path, self.yaml_path)
コード例 #6
0
    def check_instruction_type(specs: dict, tool_name,
                               expected_instruction) -> str:
        ParameterValidator.assert_keys_present(list(specs.keys()),
                                               ['definitions', 'instructions'],
                                               tool_name, "YAML specification")
        assert len(list(specs['instructions'].keys())) == 1, f"{tool_name}: multiple instructions were given " \
                                                             f"({str(list(specs['instructions'].keys()))[1:-1]}), but only one instruction of type " \
                                                             f"{expected_instruction} should be specified."
        instruction_name = list(specs['instructions'].keys())[0]
        instruction_type = specs['instructions'][instruction_name]['type']
        assert instruction_type == expected_instruction, \
            f"{tool_name}: instruction type has to be '{expected_instruction}', got {instruction_type} instead."

        return instruction_name
コード例 #7
0
    def parse_signals(signals: dict, symbol_table: SymbolTable):
        for key, signal_spec in signals.items():

            ParameterValidator.assert_keys_present(signal_spec.keys(), SignalParser.VALID_KEYS, "SignalParser", key)

            implanting_strategy = SignalParser._get_implanting_strategy(key, signal_spec)

            ParameterValidator.assert_keys(signal_spec["motifs"], symbol_table.get_keys_by_type(SymbolType.MOTIF), "SignalParser",
                                           f"motifs in signal {key}", False)

            signal_motifs = [symbol_table.get(motif_id) for motif_id in signal_spec["motifs"]]
            signal = Signal(key, signal_motifs, implanting_strategy)
            symbol_table.add(key, SymbolType.SIGNAL, signal)

        return symbol_table, signals
コード例 #8
0
    def check_export_format(specs: dict, tool_name: str,
                            instruction_name: str):
        ParameterValidator.assert_keys_present(
            list(specs['instructions'][instruction_name].keys()),
            ["export_formats"], tool_name,
            f"{instruction_name}/export_formats")
        ParameterValidator.assert_type_and_value(
            specs['instructions'][instruction_name]["export_formats"], list,
            tool_name, f"{instruction_name}/export_formats")

        assert len(specs['instructions'][instruction_name]["export_formats"]) == 1, \
            f"{tool_name}: only one format can be specified under export_formats parameter under " \
            f"{instruction_name}/export_formats, got {specs['instructions'][instruction_name]['export_formats']} instead."

        return specs['instructions'][instruction_name]["export_formats"][0]
コード例 #9
0
    def _check_dataset_specs(self, workflow_specification, location):
        ParameterValidator.assert_type_and_value(
            workflow_specification['definitions'], dict, location,
            'definitions')
        ParameterValidator.assert_keys_present(
            workflow_specification['definitions'].keys(), ['datasets'],
            location, 'definitions')
        ParameterValidator.assert_type_and_value(
            workflow_specification['definitions']['datasets'], dict, location,
            'datasets')

        dataset_names = list(
            workflow_specification['definitions']['datasets'].keys())

        assert len(dataset_names) > 1, \
            f"MultiDatasetBenchmarkTool: there is only one dataset specified ({dataset_names[0]}), while this tool operates on multiple datasets. " \
            f"If only one dataset is needed, consider using the training instruction directly."
コード例 #10
0
ファイル: InstructionParser.py プロジェクト: rofrank/immuneML
    def parse_instruction(key: str, instruction: dict,
                          symbol_table: SymbolTable, path) -> tuple:
        ParameterValidator.assert_keys_present(list(instruction.keys()),
                                               ["type"],
                                               InstructionParser.__name__, key)
        valid_instructions = [
            cls[:-6]
            for cls in ReflectionHandler.discover_classes_by_partial_name(
                "Parser", "dsl/instruction_parsers/")
        ]
        ParameterValidator.assert_in_valid_list(instruction["type"],
                                                valid_instructions,
                                                "InstructionParser", "type")

        parser = ReflectionHandler.get_class_by_name(
            "{}Parser".format(instruction["type"]), "instruction_parsers/")()
        instruction_object = parser.parse(key, instruction, symbol_table, path)

        symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object)
        return instruction, symbol_table
コード例 #11
0
    def build_object(cls, **kwargs):

        ParameterValidator.assert_keys(
            kwargs.keys(),
            ['reference_path', 'comparison_attributes', 'name', 'label'],
            ReferenceSequenceOverlap.__name__,
            f"reports: {kwargs['name'] if 'name' in kwargs else ''}")

        assert os.path.isfile(kwargs['reference_path']), f"{ReferenceSequenceOverlap.__name__}: 'reference_path' for report {kwargs['name']} is not " \
                                                         f"a valid file path."

        reference_sequences_df = pd.read_csv(kwargs['reference_path'])
        attributes = reference_sequences_df.columns.tolist()

        ParameterValidator.assert_keys_present(
            expected_values=kwargs['comparison_attributes'],
            values=attributes,
            location=ReferenceSequenceOverlap.__name__,
            parameter_name='columns in file under reference_path')

        return ReferenceSequenceOverlap(**kwargs)
コード例 #12
0
    def _get_implanting_strategy(key: str, signal: dict) -> SignalImplantingStrategy:

        valid_strategies = [cls[:-10] for cls in
                            ReflectionHandler.discover_classes_by_partial_name("Implanting", "simulation/signal_implanting_strategy/")]
        ParameterValidator.assert_in_valid_list(signal["implanting"], valid_strategies, "SignalParser", key)

        defaults = DefaultParamsLoader.load("signal_implanting_strategy/", f"{signal['implanting']}Implanting")
        signal = {**defaults, **signal}

        ParameterValidator.assert_keys_present(list(signal.keys()), ["motifs", "implanting", "sequence_position_weights"], SignalParser.__name__, key)

        implanting_comp = None
        if 'implanting_computation' in signal:
            implanting_comp = signal['implanting_computation'].lower()
            ParameterValidator.assert_in_valid_list(implanting_comp, [el.name.lower() for el in ImplantingComputation], SignalParser.__name__,
                                                    'implanting_computation')
            implanting_comp = ImplantingComputation[implanting_comp.upper()]

        implanting_strategy = ReflectionHandler.get_class_by_name(f"{signal['implanting']}Implanting")(GappedMotifImplanting(),
                                                                                                       signal["sequence_position_weights"],
                                                                                                       implanting_comp)

        return implanting_strategy
コード例 #13
0
    def _check_instruction_specs(self, workflow_specification, location):
        ParameterValidator.assert_type_and_value(
            workflow_specification['instructions'], dict, location,
            'instructions')

        instruction_names = list(workflow_specification['instructions'].keys())
        assert len(instruction_names) == 1, f"MultiDatasetBenchmarkTool: there can be only one instruction specified for this tool. " \
                                            f"Currently the following instructions are specified: {instruction_names}."

        ParameterValidator.assert_keys_present(
            workflow_specification['instructions'][
                instruction_names[0]].keys(), ['type', 'datasets'], location,
            instruction_names[0])

        instruction_type = workflow_specification['instructions'][
            instruction_names[0]]['type']
        assert instruction_type == 'TrainMLModel', \
            f"MultiDatasetBenchmarkTool: this tool works only with instruction of type 'TrainMLModel', got {instruction_type} instead."

        datasets_in_instruction = workflow_specification['instructions'][
            instruction_names[0]]['datasets']
        assert len(datasets_in_instruction) > 1, \
            f'{location}: this tool takes a multiple dataset names as input, but only {len(datasets_in_instruction)} were provided: ' \
            f'{datasets_in_instruction}.'