Example #1
0
    def _parse_dataset(key: str, dataset_specs: dict,
                       symbol_table: SymbolTable,
                       result_path: Path) -> SymbolTable:
        location = "ImportParser"

        ParameterValidator.assert_keys(list(dataset_specs.keys()),
                                       ImportParser.valid_keys, location,
                                       f"datasets:{key}", False)

        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataImport, "Import", "IO/dataset_import/")
        ParameterValidator.assert_in_valid_list(dataset_specs["format"],
                                                valid_formats, location,
                                                "format")

        import_cls = ReflectionHandler.get_class_by_name("{}Import".format(
            dataset_specs["format"]))
        params = ImportParser._prepare_params(dataset_specs, result_path, key)

        if "is_repertoire" in params:
            ParameterValidator.assert_type_and_value(params["is_repertoire"],
                                                     bool, location,
                                                     "is_repertoire")

            if params["is_repertoire"] == True:
                if import_cls != IReceptorImport:
                    assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/"
                    ParameterValidator.assert_type_and_value(
                        params["metadata_file"], Path, location,
                        "metadata_file")

            if params["is_repertoire"] == False:
                assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/"
                ParameterValidator.assert_type_and_value(
                    params["paired"], bool, location, "paired")

                if params["paired"] == True:
                    assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/"
                    ParameterValidator.assert_in_valid_list(
                        params["receptor_chains"],
                        ["_".join(cp.value) for cp in ChainPair], location,
                        "receptor_chains")

        try:
            dataset = import_cls.import_dataset(params, key)
            dataset.name = key
            symbol_table.add(key, SymbolType.DATASET, dataset)
        except KeyError as key_error:
            raise KeyError(
                f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
                f"The keyword {key_error.args[0]} was missing. This either means this argument was "
                f"not defined under definitions/datasets/{key}/params, or this column was missing from "
                f"an input data file. ")
        except Exception as ex:
            raise Exception(
                f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details."
            )

        return symbol_table
Example #2
0
    def _parse_report(key: str, params: dict, symbol_table: SymbolTable):
        valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(Report, "", "reports/")
        report_object, params = ObjectParser.parse_object(params, valid_values, "", "reports/", "ReportParser", key, builder=True,
                                                          return_params_dict=True)

        symbol_table.add(key, SymbolType.REPORT, report_object)

        return symbol_table, params
    def get_documentation():
        doc = str(DatasetExportInstruction.__doc__)

        valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, "Exporter", "dataset_export/")
        valid_strategy_values = str(valid_strategy_values)[1:-1].replace("'", "`")
        mapping = {
            "Valid formats are class names of any non-abstract class inheriting "
            ":py:obj:`~immuneML.IO.dataset_export.DataExporter.DataExporter`.": f"Valid values are: {valid_strategy_values}."
        }
        doc = update_docs_per_mapping(doc, mapping)
        return doc
Example #4
0
    def get_documentation():
        initial_doc = str(Signal.__doc__)

        valid_implanting_values = str(
            ReflectionHandler.all_nonabstract_subclass_basic_names(
                SignalImplantingStrategy, 'Implanting',
                'signal_implanting_strategy/'))[1:-1].replace("'", "`")

        docs_mapping = {
            "Valid values for this argument are class names of different signal implanting strategies.":
            f"Valid values are: {valid_implanting_values}"
        }

        doc = update_docs_per_mapping(initial_doc, docs_mapping)
        return doc
Example #5
0
    def get_documentation():
        doc = str(Motif.__doc__)

        valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names(
            MotifInstantiationStrategy, "Instantiation",
            "motif_instantiation_strategy/")
        valid_strategy_values = str(valid_strategy_values)[1:-1].replace(
            "'", "`")
        chain_values = str([name for name in Chain])[1:-1].replace("'", "`")
        mapping = {
            "It should be one of the classes inheriting MotifInstantiationStrategy.":
            f"Valid values are: {valid_strategy_values}.",
            "The value should be an instance of :py:obj:`~immuneML.data_model.receptor.receptor_sequence.Chain.Chain`.":
            f"Valid values are: {chain_values}."
        }
        doc = update_docs_per_mapping(doc, mapping)
        return doc
Example #6
0
    def _parse_ml_method(ml_method_id: str, ml_specification) -> tuple:

        valid_class_values = ReflectionHandler.all_nonabstract_subclass_basic_names(
            MLMethod, "", "ml_methods/")

        if type(ml_specification) is str:
            ml_specification = {ml_specification: {}}

        ml_specification = {
            **DefaultParamsLoader.load("ml_methods/", "MLMethod"),
            **ml_specification
        }
        ml_specification_keys = list(ml_specification.keys())

        ParameterValidator.assert_all_in_valid_list(
            list(ml_specification_keys),
            ["model_selection_cv", "model_selection_n_folds"] +
            valid_class_values, "MLParser", ml_method_id)

        non_default_keys = [
            key for key in ml_specification.keys()
            if key not in ["model_selection_cv", "model_selection_n_folds"]
        ]

        assert len(ml_specification_keys) == 3, f"MLParser: ML method {ml_method_id} was not correctly specified. Expected at least 1 key " \
                                                f"(ML method name), got {len(ml_specification_keys) - 2} instead: " \
                                                f"{str([key for key in non_default_keys])[1:-1]}."

        ml_method_class_name = non_default_keys[0]
        ml_method_class = ReflectionHandler.get_class_by_name(
            ml_method_class_name, "ml_methods/")

        ml_specification[ml_method_class_name] = {
            **DefaultParamsLoader.load("ml_methods/",
                                       ml_method_class_name,
                                       log_if_missing=False),
            **ml_specification[ml_method_class_name]
        }

        method, params = MLParser.create_method_instance(
            ml_specification, ml_method_class, ml_method_id)
        ml_specification[ml_method_class_name] = params
        method.name = ml_method_id

        return method, ml_specification
Example #7
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> SubsamplingInstruction:
        valid_keys = [
            "type", "dataset", "subsampled_dataset_sizes",
            "dataset_export_formats"
        ]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys,
                                       SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'],
                                                dataset_keys,
                                                SubsamplingParser.__name__,
                                                f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(
            instruction['subsampled_dataset_sizes'], list,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(
            instruction['subsampled_dataset_sizes'], int,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1,
            dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(
            instruction['dataset_export_formats'], list,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction['dataset_export_formats'], valid_export_formats,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(
            dataset=dataset,
            subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
            dataset_export_formats=[
                ReflectionHandler.get_class_by_name(export_format + "Exporter",
                                                    "dataset_export/")
                for export_format in instruction['dataset_export_formats']
            ],
            name=key)
Example #8
0
    def parse_exporters(self, instruction):
        if instruction["export_formats"] is not None:
            class_path = "dataset_export/"
            ParameterValidator.assert_all_in_valid_list(
                instruction["export_formats"],
                ReflectionHandler.all_nonabstract_subclass_basic_names(
                    DataExporter, 'Exporter', class_path),
                location="SimulationParser",
                parameter_name="export_formats")
            exporters = [
                ReflectionHandler.get_class_by_name(f"{item}Exporter",
                                                    class_path)
                for item in instruction["export_formats"]
            ]
        else:
            exporters = None

        return exporters
Example #9
0
    def _parse_sequence(key: str, preproc_sequence: list,
                        symbol_table: SymbolTable) -> SymbolTable:

        sequence = []

        valid_preprocessing_classes = ReflectionHandler.all_nonabstract_subclass_basic_names(
            Preprocessor, "", "preprocessing/")

        for item in preproc_sequence:
            for step_key, step in item.items():
                obj, params = ObjectParser.parse_object(
                    step, valid_preprocessing_classes, "", "preprocessing/",
                    "PreprocessingParser", step_key, True, True)
                step = params
                sequence.append(obj)

        symbol_table.add(key, SymbolType.PREPROCESSING, sequence)
        return symbol_table
Example #10
0
    def parse_encoder(key: str, specs: dict):
        class_path = "encodings"
        valid_encoders = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DatasetEncoder, "Encoder", class_path)
        encoder = ObjectParser.get_class(specs, valid_encoders, "Encoder",
                                         class_path, "EncodingParser", key)
        params = ObjectParser.get_all_params(specs, class_path,
                                             encoder.__name__[:-7], key)

        required_params = [
            p for p in list(
                inspect.signature(encoder.__init__).parameters.keys())
            if p != "self"
        ]
        ParameterValidator.assert_all_in_valid_list(
            params.keys(), required_params, "EncoderParser",
            f"{key}/{encoder.__name__.replace('Encoder', '')}")

        return encoder, params
Example #11
0
    def _parse_motif(key: str, motif_item: dict) -> Motif:

        motif_dict = copy.deepcopy(motif_item)

        valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/")
        instantiation_object = ObjectParser.parse_object(motif_item["instantiation"], valid_values, "Instantiation",
                                                         "motif_instantiation_strategy", "MotifParser", key)
        motif_dict["instantiation"] = instantiation_object
        motif_dict["identifier"] = key

        if "name_chain1" in motif_item:
            motif_dict["name_chain1"] = Chain[motif_item["name_chain1"].upper()]
        if "name_chain2" in motif_item:
            motif_dict["name_chain2"] = Chain[motif_item["name_chain2"].upper()]

        assert "seed" in motif_dict or all(el in motif_dict for el in ["name_chain1", "name_chain2", "seed_chain1", "seed_chain2"]), \
            "MotifParser: please check the documentation for motif definition. Either parameter `seed` has to be set (for simulation in single " \
            "chain data) or all of the parameters `name_chain1`, `name_chain2`, `seed_chain1`, `seed_chain2` (for simulation for paired chain data)."

        motif = Motif(**motif_dict)

        return motif
 def get_documentation():
     doc = str(TrainMLModelInstruction.__doc__)
     valid_values = str([metric.name.lower() for metric in Metric])[1:-1].replace("'", "`")
     valid_strategies = str(ReflectionHandler.all_nonabstract_subclass_basic_names(HPOptimizationStrategy, "",
                                                                                   "hyperparameter_optimization/strategy/"))[1:-1]\
         .replace("'", "`")
     mapping = {
         "dataset (Dataset)": "dataset",
         "hp_strategy (HPOptimizationStrategy)": "strategy",
         "hp_settings": "settings",
         "assessment (SplitConfig)": "assessment",
         "selection (SplitConfig)": "selection",
         "optimization_metric (Metric)": "optimization_metric",
         "label_configuration (LabelConfiguration)": "labels (list)",
         "data_reports": "reports",
         "a list of metrics": f"a list of metrics ({valid_values})",
         "a metric to use for optimization": f"a metric to use for optimization (one of {valid_values})",
         "Valid values are objects of any class inheriting :py:obj:`~immuneML.hyperparameter_optimization.strategy."
         "HPOptimizationStrategy.HPOptimizationStrategy`.": f"Valid values are: {valid_strategies}.",
         "the reports to be specified here have to be :py:obj:`~immuneML.reports.train_ml_model_reports.TrainMLModelReport.TrainMLModelReport` reports.": f"the reports that can be provided here are :ref:`{TrainMLModelReport.get_title()}`."
     }
     doc = update_docs_per_mapping(doc, mapping)
     return doc
Example #13
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> DatasetExportInstruction:
        location = "DatasetExportParser"
        ParameterValidator.assert_keys(
            list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS +
            DatasetExportParser.OPTIONAL_KEYS, location, key, False)
        ParameterValidator.assert_keys_present(
            list(instruction.keys()), DatasetExportParser.REQUIRED_KEYS,
            location, key)

        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, "Exporter", 'dataset_export/')
        ParameterValidator.assert_all_in_valid_list(
            instruction["export_formats"], valid_formats, location,
            "export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction["datasets"],
            symbol_table.get_keys_by_type(SymbolType.DATASET), location,
            "datasets")

        return DatasetExportInstruction(
            datasets=[
                symbol_table.get(dataset_key)
                for dataset_key in instruction["datasets"]
            ],
            exporters=[
                ReflectionHandler.get_class_by_name(f"{key}Exporter",
                                                    "dataset_export/")
                for key in instruction["export_formats"]
            ],
            preprocessing_sequence=symbol_table.get(
                instruction["preprocessing_sequence"])
            if "preprocessing_sequence" in instruction else None,
            name=key)