コード例 #1
0
ファイル: ImportParser.py プロジェクト: dn070017/immuneML
    def _parse_dataset(key: str, dataset_specs: dict,
                       symbol_table: SymbolTable,
                       result_path: Path) -> SymbolTable:
        location = "ImportParser"

        ParameterValidator.assert_keys(list(dataset_specs.keys()),
                                       ImportParser.valid_keys, location,
                                       f"datasets:{key}", False)

        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataImport, "Import", "IO/dataset_import/")
        ParameterValidator.assert_in_valid_list(dataset_specs["format"],
                                                valid_formats, location,
                                                "format")

        import_cls = ReflectionHandler.get_class_by_name("{}Import".format(
            dataset_specs["format"]))
        params = ImportParser._prepare_params(dataset_specs, result_path, key)

        if "is_repertoire" in params:
            ParameterValidator.assert_type_and_value(params["is_repertoire"],
                                                     bool, location,
                                                     "is_repertoire")

            if params["is_repertoire"] == True:
                if import_cls != IReceptorImport:
                    assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/"
                    ParameterValidator.assert_type_and_value(
                        params["metadata_file"], Path, location,
                        "metadata_file")

            if params["is_repertoire"] == False:
                assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/"
                ParameterValidator.assert_type_and_value(
                    params["paired"], bool, location, "paired")

                if params["paired"] == True:
                    assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/"
                    ParameterValidator.assert_in_valid_list(
                        params["receptor_chains"],
                        ["_".join(cp.value) for cp in ChainPair], location,
                        "receptor_chains")

        try:
            dataset = import_cls.import_dataset(params, key)
            dataset.name = key
            symbol_table.add(key, SymbolType.DATASET, dataset)
        except KeyError as key_error:
            raise KeyError(
                f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
                f"The keyword {key_error.args[0]} was missing. This either means this argument was "
                f"not defined under definitions/datasets/{key}/params, or this column was missing from "
                f"an input data file. ")
        except Exception as ex:
            raise Exception(
                f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details."
            )

        return symbol_table
コード例 #2
0
ファイル: EncodingParser.py プロジェクト: uio-bmi/immuneML
    def parse(encodings: dict, symbol_table: SymbolTable):
        for key in encodings.keys():

            encoder, params = EncodingParser.parse_encoder(key, encodings[key])
            symbol_table.add(key, SymbolType.ENCODING, encoder,
                             {"encoder_params": params})

        return symbol_table, encodings
コード例 #3
0
    def _parse_report(key: str, params: dict, symbol_table: SymbolTable):
        valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(Report, "", "reports/")
        report_object, params = ObjectParser.parse_object(params, valid_values, "", "reports/", "ReportParser", key, builder=True,
                                                          return_params_dict=True)

        symbol_table.add(key, SymbolType.REPORT, report_object)

        return symbol_table, params
コード例 #4
0
ファイル: OutputParser.py プロジェクト: uio-bmi/immuneML
    def parse(specs: dict, symbol_table: SymbolTable) -> dict:
        if "output" in specs:
            ParameterValidator.assert_keys(specs["output"], ["format"], "OutputParser", "output")
            ParameterValidator.assert_in_valid_list(specs["output"]["format"], ["HTML"], "OutputParser", "format")
        else:
            specs["output"] = {"format": "HTML"}
        symbol_table.add("output", SymbolType.OUTPUT, specs["output"])

        return specs["output"]
コード例 #5
0
ファイル: MLParser.py プロジェクト: dn070017/immuneML
    def parse(specification: dict, symbol_table: SymbolTable):

        for ml_method_id in specification.keys():
            ml_method, config = MLParser._parse_ml_method(
                ml_method_id, specification[ml_method_id])
            specification[ml_method_id] = config
            symbol_table.add(ml_method_id, SymbolType.ML_METHOD, ml_method,
                             config)

        return symbol_table, specification
コード例 #6
0
ファイル: MotifParser.py プロジェクト: uio-bmi/immuneML
    def parse_motifs(motifs: dict, symbol_table: SymbolTable):

        valid_motif_keys = ["seed", "instantiation", "seed_chain1", "seed_chain2", "name_chain1", "name_chain2"]
        for key in motifs.keys():

            ParameterValidator.assert_keys(motifs[key].keys(), valid_motif_keys, "MotifParser", key, exclusive=False)

            motif = MotifParser._parse_motif(key, motifs[key])
            symbol_table.add(key, SymbolType.MOTIF, motif)

        return symbol_table, motifs
コード例 #7
0
ファイル: InstructionParser.py プロジェクト: uio-bmi/immuneML
    def parse_instruction(key: str, instruction: dict, symbol_table: SymbolTable, path) -> tuple:

        ParameterValidator.assert_keys_present(list(instruction.keys()), ["type"], InstructionParser.__name__, key)
        valid_instructions = [cls[:-6] for cls in ReflectionHandler.discover_classes_by_partial_name("Parser", "dsl/instruction_parsers/")]
        ParameterValidator.assert_in_valid_list(instruction["type"], valid_instructions, "InstructionParser", "type")

        default_params = DefaultParamsLoader.load("instructions/", instruction["type"])
        instruction = {**default_params, **instruction}
        parser = ReflectionHandler.get_class_by_name("{}Parser".format(instruction["type"]), "instruction_parsers/")()
        instruction_object = parser.parse(key, instruction, symbol_table, path)

        symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object)
        return instruction, symbol_table
コード例 #8
0
    def test_parse(self):
        specs = {
            "type": "DatasetExport",
            "export_formats": ["Pickle", "AIRR"],
            "datasets": ["d1"]
        }

        symbol_table = SymbolTable()
        symbol_table.add("d1", SymbolType.DATASET, RepertoireDataset())

        instruction = DatasetExportParser().parse("instr1", specs,
                                                  symbol_table)

        self.assertTrue(isinstance(instruction, DatasetExportInstruction))
        self.assertEqual(2, len(instruction.exporters))
        self.assertEqual(1, len(instruction.datasets))
コード例 #9
0
    def test_parse(self):

        path = PathBuilder.build(
            f'{EnvironmentSettings.tmp_test_path}subsampling_parser/')
        dataset = RandomDatasetGenerator.generate_receptor_dataset(
            30, {3: 1}, {2: 1}, {}, path)

        symbol_table = SymbolTable()
        symbol_table.add("d1", SymbolType.DATASET, dataset)

        SubsamplingParser().parse(
            'inst1', {
                'dataset': 'd1',
                'type': 'Subsampling',
                'subsampled_dataset_sizes': [10, 20],
                'dataset_export_formats': ['Pickle']
            }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd1',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 50],
                    'dataset_export_formats': ['Pickle']
                }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd2',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 20],
                    'dataset_export_formats': ['Pickle']
                }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd2',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 20],
                    'dataset_export_formats': ['Random']
                }, symbol_table)

        shutil.rmtree(path)
コード例 #10
0
    def _parse_sequence(key: str, preproc_sequence: list,
                        symbol_table: SymbolTable) -> SymbolTable:

        sequence = []

        valid_preprocessing_classes = ReflectionHandler.all_nonabstract_subclass_basic_names(
            Preprocessor, "", "preprocessing/")

        for item in preproc_sequence:
            for step_key, step in item.items():
                obj, params = ObjectParser.parse_object(
                    step, valid_preprocessing_classes, "", "preprocessing/",
                    "PreprocessingParser", step_key, True, True)
                step = params
                sequence.append(obj)

        symbol_table.add(key, SymbolType.PREPROCESSING, sequence)
        return symbol_table
コード例 #11
0
    def test_parse_simulation(self):

        simulation = {
            "sim1": {
                "var1": {
                    "signals": ["signal1"],
                    "dataset_implanting_rate": 0.5,
                    "repertoire_implanting_rate": 0.1
                }
            }
        }

        symbol_table = SymbolTable()
        symbol_table.add("motif1", SymbolType.MOTIF, Motif("motif1", GappedKmerInstantiation(position_weights={0: 1}), seed="CAS"))
        symbol_table.add("signal1", SymbolType.SIGNAL, Signal("signal1", [symbol_table.get("motif1")],
                                                              HealthySequenceImplanting(GappedMotifImplanting(), implanting_computation=ImplantingComputation.ROUND)))

        symbol_table, specs = SimulationParser.parse_simulations(simulation, symbol_table)

        self.assertTrue(symbol_table.contains("sim1"))
        sim1 = symbol_table.get("sim1")
        self.assertEqual(1, len(sim1.implantings))
コード例 #12
0
    def _parse_simulation(key: str, simulation: dict, symbol_table: SymbolTable) -> SymbolTable:

        location = "SimulationParser"
        valid_implanting_keys = ["dataset_implanting_rate", "repertoire_implanting_rate", "signals", "is_noise"]
        implantings = []

        for impl_key, implanting in simulation.items():

            ParameterValidator.assert_keys(implanting.keys(), valid_implanting_keys, location, impl_key, exclusive=False)
            ParameterValidator.assert_keys(implanting["signals"], symbol_table.get_keys_by_type(SymbolType.SIGNAL), location, impl_key, False)

            implanting_params = copy.deepcopy(implanting)
            implanting_params["signals"] = [symbol_table.get(signal) for signal in implanting["signals"]]
            implanting_params["name"] = impl_key

            implantings.append(Implanting(**implanting_params))

        assert sum([settings["dataset_implanting_rate"] for settings in simulation.values()]) <= 1, \
            "The total dataset implanting rate can not exceed 1."

        symbol_table.add(key, SymbolType.SIMULATION, Simulation(implantings))

        return symbol_table
コード例 #13
0
ファイル: SignalParser.py プロジェクト: uio-bmi/immuneML
    def parse_signals(signals: dict, symbol_table: SymbolTable):
        for key, signal_spec in signals.items():

            ParameterValidator.assert_keys_present(signal_spec.keys(),
                                                   SignalParser.VALID_KEYS,
                                                   "SignalParser", key)

            implanting_strategy = SignalParser._get_implanting_strategy(
                key, signal_spec)

            ParameterValidator.assert_keys(
                signal_spec["motifs"],
                symbol_table.get_keys_by_type(SymbolType.MOTIF),
                "SignalParser", f"motifs in signal {key}", False)

            signal_motifs = [
                symbol_table.get(motif_id)
                for motif_id in signal_spec["motifs"]
            ]
            signal = Signal(key, signal_motifs, implanting_strategy)
            symbol_table.add(key, SymbolType.SIGNAL, signal)

        return symbol_table, signals
コード例 #14
0
 def test_add(self):
     symbol_table = SymbolTable()
     symbol_table.add("svm1", SymbolType.ML_METHOD, {})
     with self.assertWarns(Warning):
         symbol_table.add("svm1", SymbolType.ML_METHOD, {})
コード例 #15
0
    def test_parse(self):

        path = EnvironmentSettings.tmp_test_path / "explanalysisparser/"
        PathBuilder.build(path)

        dataset = self.prepare_dataset(path)
        report1 = SequenceLengthDistribution()

        file_content = """complex.id	Gene	CDR3	V	J	Species	MHC A	MHC B	MHC class	Epitope	Epitope gene	Epitope species	Reference	Method	Meta	CDR3fix	Score
        100a	TRA	AAAC	TRAV12	TRAJ1	HomoSapiens	HLA-A*11:01	B2M	MHCI	AVFDRKSDAK	EBNA4	EBV
        """

        with open(path / "refs.tsv", "w") as file:
            file.writelines(file_content)

        refs = {
            "params": {
                "path": path / "refs.tsv",
                "region_type": "FULL_SEQUENCE"
            },
            "format": "VDJdb"
        }

        report2 = Matches.build_object()
        encoding = MatchedSequencesEncoder
        p1 = [SubjectRepertoireCollector()]

        instruction = {
            "type": "ExploratoryAnalysis",
            "number_of_processes": 32,
            "analyses": {
                "1": {
                    "dataset": "d1",
                    "report": "r1",
                    "preprocessing_sequence": "p1"
                },
                "2": {
                    "dataset": "d1",
                    "report": "r2",
                    "encoding": "e1",
                },
                "3": {
                    "dataset": "d1",
                    "report": "r2",
                    "encoding": "e1",
                    "labels": ["l1"]
                }
            }
        }

        symbol_table = SymbolTable()
        symbol_table.add("d1", SymbolType.DATASET, dataset)
        symbol_table.add("r1", SymbolType.REPORT, report1)
        symbol_table.add("r2", SymbolType.REPORT, report2)
        symbol_table.add(
            "e1", SymbolType.ENCODING, encoding,
            {"encoder_params": {
                "max_edit_distance": 1,
                "reference": refs
            }})
        symbol_table.add("p1", SymbolType.PREPROCESSING, p1)

        process = ExploratoryAnalysisParser().parse("a", instruction,
                                                    symbol_table)

        self.assertEqual(
            3, len(list(process.state.exploratory_analysis_units.values())))
        self.assertTrue(
            isinstance(
                list(process.state.exploratory_analysis_units.values())
                [0].report, SequenceLengthDistribution))

        # testing matches with and without labels
        self.assertTrue(
            isinstance(
                list(process.state.exploratory_analysis_units.values())
                [1].report, Matches))
        self.assertTrue(
            isinstance(
                list(process.state.exploratory_analysis_units.values())
                [1].encoder, MatchedSequencesEncoder))
        self.assertEqual(
            1,
            len(
                list(process.state.exploratory_analysis_units.values())
                [1].encoder.reference_sequences))

        self.assertTrue(
            isinstance(
                list(process.state.exploratory_analysis_units.values())
                [2].report, Matches))
        self.assertTrue(
            isinstance(
                list(process.state.exploratory_analysis_units.values())
                [2].encoder, MatchedSequencesEncoder))
        self.assertEqual(
            1,
            len(
                list(process.state.exploratory_analysis_units.values())
                [2].encoder.reference_sequences))
        self.assertEqual(
            "l1",
            list(process.state.exploratory_analysis_units.values())
            [2].label_config.get_labels_by_name()[0])
        self.assertEqual(
            32,
            process.state.exploratory_analysis_units["2"].number_of_processes)

        shutil.rmtree(path)