コード例 #1
0
    def _prepare_optional_params(self, analysis: dict,
                                 symbol_table: SymbolTable) -> dict:

        params = {}
        dataset = symbol_table.get(analysis["dataset"])

        if all(key in analysis for key in ["encoding", "labels"]):
            params["encoder"] = symbol_table.get(analysis["encoding"]) \
                .build_object(dataset, **symbol_table.get_config(analysis["encoding"])["encoder_params"])
            params["label_config"] = LabelConfiguration()
            for label in analysis["labels"]:
                label_values = self._get_label_values(label, dataset)
                params["label_config"].add_label(label, label_values)
        elif any(key in analysis for key in ["encoding", "labels"]):
            raise KeyError(
                "ExploratoryAnalysisParser: keys for analyses are not properly defined. "
                "If encoding is defined, labels have to be defined as well and vice versa."
            )

        if "preprocessing_sequence" in analysis:
            params["preprocessing_sequence"] = symbol_table.get(
                analysis["preprocessing_sequence"])

        if "number_of_processes" in analysis:
            params["number_of_processes"] = analysis["number_of_processes"]

        return params
コード例 #2
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: str = None) -> DatasetExportInstruction:
        location = "DatasetExportParser"
        ParameterValidator.assert_keys(list(instruction.keys()),
                                       DatasetExportParser.VALID_KEYS,
                                       location, key)
        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, "Exporter", 'dataset_export/')
        ParameterValidator.assert_all_in_valid_list(
            instruction["export_formats"], valid_formats, location,
            "export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction["datasets"],
            symbol_table.get_keys_by_type(SymbolType.DATASET), location,
            "datasets")

        return DatasetExportInstruction(
            datasets=[
                symbol_table.get(dataset_key)
                for dataset_key in instruction["datasets"]
            ],
            exporters=[
                ReflectionHandler.get_class_by_name(f"{key}Exporter",
                                                    "dataset_export/")
                for key in instruction["export_formats"]
            ],
            name=key)
コード例 #3
0
    def parse(specification: dict, symbol_table: SymbolTable):

        for ml_method_id in specification.keys():
            ml_method, config = MLParser._parse_ml_method(ml_method_id, specification[ml_method_id])
            specification[ml_method_id] = config
            symbol_table.add(ml_method_id, SymbolType.ML_METHOD, ml_method, config)

        return symbol_table, specification
コード例 #4
0
ファイル: EncodingParser.py プロジェクト: rofrank/immuneML
    def parse(encodings: dict, symbol_table: SymbolTable):
        for key in encodings.keys():

            encoder, params = EncodingParser.parse_encoder(key, encodings[key])
            symbol_table.add(key, SymbolType.ENCODING, encoder,
                             {"encoder_params": params})

        return symbol_table, encodings
コード例 #5
0
ファイル: OutputParser.py プロジェクト: rofrank/immuneML
    def parse(specs: dict, symbol_table: SymbolTable) -> dict:
        if "output" in specs:
            ParameterValidator.assert_keys(specs["output"], ["format"],
                                           "OutputParser", "output")
            ParameterValidator.assert_in_valid_list(specs["output"]["format"],
                                                    ["HTML"], "OutputParser",
                                                    "format")
        else:
            specs["output"] = {"format": "HTML"}
        symbol_table.add("output", SymbolType.OUTPUT, specs["output"])

        return specs["output"]
コード例 #6
0
    def parse_signals(signals: dict, symbol_table: SymbolTable):
        for key, signal_spec in signals.items():

            ParameterValidator.assert_keys_present(signal_spec.keys(), SignalParser.VALID_KEYS, "SignalParser", key)

            implanting_strategy = SignalParser._get_implanting_strategy(key, signal_spec)

            ParameterValidator.assert_keys(signal_spec["motifs"], symbol_table.get_keys_by_type(SymbolType.MOTIF), "SignalParser",
                                           f"motifs in signal {key}", False)

            signal_motifs = [symbol_table.get(motif_id) for motif_id in signal_spec["motifs"]]
            signal = Signal(key, signal_motifs, implanting_strategy)
            symbol_table.add(key, SymbolType.SIGNAL, signal)

        return symbol_table, signals
コード例 #7
0
ファイル: ReportParser.py プロジェクト: rofrank/immuneML
    def _parse_report(key: str, params: dict, symbol_table: SymbolTable):
        valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(
            Report, "", "reports/")
        report_object, params = ObjectParser.parse_object(
            params,
            valid_values,
            "",
            "reports/",
            "ReportParser",
            key,
            builder=True,
            return_params_dict=True)

        symbol_table.add(key, SymbolType.REPORT, report_object)

        return symbol_table, params
コード例 #8
0
    def test_parse_receptor_dataset(self):
        file_content = """complex.id	Gene	CDR3	V	J	Species	MHC A	MHC B	MHC class	Epitope	Epitope gene	Epitope species	Reference	Method	Meta	CDR3fix	Score
3050	TRB	CASSPPRVYSNGAGLAGVGWRNEQFF	TRBV5-4*01	TRBJ2-1*01	HomoSapiens	HLA-A*11:01	B2M	MHCI	AVFDRKSDAK	EBNA4	EBV	https://www.10xgenomics.com/resources/application-notes/a-new-way-of-exploring-immunity-linking-highly-multiplexed-antigen-recognition-to-immune-repertoire-and-phenotype/#	{"frequency": "1/11684", "identification": "dextramer-sort", "sequencing": "rna-seq", "singlecell": "yes", "verification": ""}	{"cell.subset": "", "clone.id": "", "donor.MHC": "", "donor.MHC.method": "", "epitope.id": "", "replica.id": "", "samples.found": 1, "structure.id": "", "studies.found": 1, "study.id": "", "subject.cohort": "", "subject.id": "1", "tissue": ""}	{"cdr3": "CASSPPRVYSNGAGLAGVGWRNEQFF", "cdr3_old": "CASSPPRVYSNGAGLAGVGWRNEQFF", "fixNeeded": false, "good": true, "jCanonical": true, "jFixType": "NoFixNeeded", "jId": "TRBJ2-1*01", "jStart": 21, "vCanonical": true, "vEnd": 4, "vFixType": "NoFixNeeded", "vId": "TRBV5-4*01"}	0
15760	TRB	CASSWTWDAATLWGQGALGGANVLTF	TRBV5-5*01	TRBJ2-6*01	HomoSapiens	HLA-A*03:01	B2M	MHCI	KLGGALQAK	IE1	CMV	https://www.10xgenomics.com/resources/application-notes/a-new-way-of-exploring-immunity-linking-highly-multiplexed-antigen-recognition-to-immune-repertoire-and-phenotype/#	{"frequency": "1/25584", "identification": "dextramer-sort", "sequencing": "rna-seq", "singlecell": "yes", "verification": ""}	{"cell.subset": "", "clone.id": "", "donor.MHC": "", "donor.MHC.method": "", "epitope.id": "", "replica.id": "", "samples.found": 1, "structure.id": "", "studies.found": 1, "study.id": "", "subject.cohort": "", "subject.id": "3", "tissue": ""}	{"cdr3": "CASSWTWDAATLWGQGALGGANVLTF", "cdr3_old": "CASSWTWDAATLWGQGALGGANVLTF", "fixNeeded": false, "good": true, "jCanonical": true, "jFixType": "NoFixNeeded", "jId": "TRBJ2-6*01", "jStart": 19, "vCanonical": true, "vEnd": 4, "vFixType": "NoFixNeeded", "vId": "TRBV5-5*01"}	0
3050	TRA	CAAIYESRGSTLGRLYF	TRAV13-1*01	TRAJ18*01	HomoSapiens	HLA-A*11:01	B2M	MHCI	AVFDRKSDAK	EBNA4	EBV	https://www.10xgenomics.com/resources/application-notes/a-new-way-of-exploring-immunity-linking-highly-multiplexed-antigen-recognition-to-immune-repertoire-and-phenotype/#	{"frequency": "1/11684", "identification": "dextramer-sort", "sequencing": "rna-seq", "singlecell": "yes", "verification": ""}	{"cell.subset": "", "clone.id": "", "donor.MHC": "", "donor.MHC.method": "", "epitope.id": "", "replica.id": "", "samples.found": 1, "structure.id": "", "studies.found": 1, "study.id": "", "subject.cohort": "", "subject.id": "1", "tissue": ""}	{"cdr3": "CAAIYESRGSTLGRLYF", "cdr3_old": "CAAIYESRGSTLGRLYF", "fixNeeded": false, "good": true, "jCanonical": true, "jFixType": "NoFixNeeded", "jId": "TRAJ18*01", "jStart": 7, "oldVEnd": -1, "oldVFixType": "FailedBadSegment", "oldVId": null, "vCanonical": true, "vEnd": 3, "vFixType": "ChangeSegment", "vId": "TRAV13-1*01"}	0
15760	TRA	CALRLNNQGGKLIF	TRAV9-2*01	TRAJ23*01	HomoSapiens	HLA-A*03:01	B2M	MHCI	KLGGALQAK	IE1	CMV	https://www.10xgenomics.com/resources/application-notes/a-new-way-of-exploring-immunity-linking-highly-multiplexed-antigen-recognition-to-immune-repertoire-and-phenotype/#	{"frequency": "1/25584", "identification": "dextramer-sort", "sequencing": "rna-seq", "singlecell": "yes", "verification": ""}	{"cell.subset": "", "clone.id": "", "donor.MHC": "", "donor.MHC.method": "", "epitope.id": "", "replica.id": "", "samples.found": 1, "structure.id": "", "studies.found": 1, "study.id": "", "subject.cohort": "", "subject.id": "3", "tissue": ""}	{"cdr3": "CALRLNNQGGKLIF", "cdr3_old": "CALRLNNQGGKLIF", "fixNeeded": false, "good": true, "jCanonical": true, "jFixType": "NoFixNeeded", "jId": "TRAJ23*01", "jStart": 6, "vCanonical": true, "vEnd": 3, "vFixType": "NoFixNeeded", "vId": "TRAV9-2*01"}	0
                """
        path = EnvironmentSettings.root_path + "test/tmp/dslimportparservdj/"
        data_path = EnvironmentSettings.root_path + "test/tmp/dslimportparservdj/receptor_data/"
        PathBuilder.build(data_path)

        with open(data_path + "receptors.tsv", "w") as file:
            file.writelines(file_content)

        st, desc = ImportParser.parse(
            {
                "datasets": {
                    "d1": {
                        "format": "VDJdb",
                        "params": {
                            "is_repertoire": False,
                            "paired": True,
                            "receptor_chains": "TRA_TRB",
                            "path": data_path
                        }
                    }
                }
            }, SymbolTable(), path)

        dataset = st.get("d1")
        self.assertTrue(isinstance(dataset, ReceptorDataset))
        self.assertEqual(2, dataset.get_example_count())

        shutil.rmtree(path)
コード例 #9
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str) -> MLApplicationInstruction:
        location = MLApplicationParser.__name__
        ParameterValidator.assert_keys(instruction.keys(), ['type', 'dataset', 'label', 'pool_size', 'config_path', 'store_encoded_data'], location, key)
        ParameterValidator.assert_in_valid_list(instruction['dataset'], symbol_table.get_keys_by_type(SymbolType.DATASET), location, f"{key}: dataset")
        ParameterValidator.assert_type_and_value(instruction['pool_size'], int, location, f"{key}: pool_size", min_inclusive=1)
        ParameterValidator.assert_type_and_value(instruction['label'], str, location, f'{key}: label')
        ParameterValidator.assert_type_and_value(instruction['config_path'], str, location, f'{key}: config_path')
        ParameterValidator.assert_type_and_value(instruction['store_encoded_data'], bool, location, f'{key}: store_encoded_data')

        hp_setting, label = self._parse_hp_setting(instruction, path, key)

        instruction = MLApplicationInstruction(dataset=symbol_table.get(instruction['dataset']), name=key, pool_size=instruction['pool_size'],
                                               label_configuration=LabelConfiguration([label]), hp_setting=hp_setting,
                                               store_encoded_data=instruction['store_encoded_data'])

        return instruction
コード例 #10
0
    def test_parse(self):
        workflow_specs = {
            "seq1": [{
                "filter_chain_B": {
                    "ChainRepertoireFilter": {
                        "keep_chain": "A"
                    }
                }
            }],
            "seq2": [{
                "filter_chain_A": {
                    "ChainRepertoireFilter": {
                        "keep_chain": "B"
                    }
                }
            }]
        }
        symbol_table = SymbolTable()
        table, specs = PreprocessingParser.parse(workflow_specs, symbol_table)

        self.assertTrue(table.contains("seq1"))
        self.assertTrue(table.contains("seq2"))
        self.assertTrue(
            isinstance(table.get("seq1"), list)
            and len(table.get("seq1")) == 1)
        self.assertEqual(list(workflow_specs.keys()), list(specs.keys()))
コード例 #11
0
    def test_parse(self):
        specs = {
            "type": "DatasetExport",
            "export_formats": ["Pickle", "AIRR"],
            "datasets": ["d1"]
        }

        symbol_table = SymbolTable()
        symbol_table.add("d1", SymbolType.DATASET, RepertoireDataset())

        instruction = DatasetExportParser().parse("instr1", specs,
                                                  symbol_table)

        self.assertTrue(isinstance(instruction, DatasetExportInstruction))
        self.assertEqual(2, len(instruction.exporters))
        self.assertEqual(1, len(instruction.datasets))
コード例 #12
0
    def test_parse(self):

        path = PathBuilder.build(
            f'{EnvironmentSettings.tmp_test_path}subsampling_parser/')
        dataset = RandomDatasetGenerator.generate_receptor_dataset(
            30, {3: 1}, {2: 1}, {}, path)

        symbol_table = SymbolTable()
        symbol_table.add("d1", SymbolType.DATASET, dataset)

        SubsamplingParser().parse(
            'inst1', {
                'dataset': 'd1',
                'type': 'Subsampling',
                'subsampled_dataset_sizes': [10, 20],
                'dataset_export_formats': ['Pickle']
            }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd1',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 50],
                    'dataset_export_formats': ['Pickle']
                }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd2',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 20],
                    'dataset_export_formats': ['Pickle']
                }, symbol_table)

        with self.assertRaises(AssertionError):
            SubsamplingParser().parse(
                'inst1', {
                    'dataset': 'd2',
                    'type': 'Subsampling',
                    'subsampled_dataset_sizes': [10, 20],
                    'dataset_export_formats': ['Random']
                }, symbol_table)

        shutil.rmtree(path)
コード例 #13
0
ファイル: MotifParser.py プロジェクト: rofrank/immuneML
    def parse_motifs(motifs: dict, symbol_table: SymbolTable):

        valid_motif_keys = [
            "seed", "instantiation", "seed_chain1", "seed_chain2",
            "name_chain1", "name_chain2"
        ]
        for key in motifs.keys():

            ParameterValidator.assert_keys(motifs[key].keys(),
                                           valid_motif_keys,
                                           "MotifParser",
                                           key,
                                           exclusive=False)

            motif = MotifParser._parse_motif(key, motifs[key])
            symbol_table.add(key, SymbolType.MOTIF, motif)

        return symbol_table, motifs
コード例 #14
0
    def _parse_sequence(key: str, preproc_sequence: list,
                        symbol_table: SymbolTable) -> SymbolTable:

        sequence = []

        valid_preprocessing_classes = ReflectionHandler.all_nonabstract_subclass_basic_names(
            Preprocessor, "", "preprocessing/")

        for item in preproc_sequence:
            for step_key, step in item.items():
                obj, params = ObjectParser.parse_object(
                    step, valid_preprocessing_classes, "", "preprocessing/",
                    "PreprocessingParser", step_key, True, True)
                step = params
                sequence.append(obj)

        symbol_table.add(key, SymbolType.PREPROCESSING, sequence)
        return symbol_table
コード例 #15
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=symbol_table.get(setting["ml_method"]),
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
コード例 #16
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> SubsamplingInstruction:

        valid_keys = ["type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats"]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__,
                                                     f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
                                      dataset_export_formats=[ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/")
                                                              for export_format in instruction['dataset_export_formats']], name=key)
コード例 #17
0
ファイル: InstructionParser.py プロジェクト: rofrank/immuneML
    def parse_instruction(key: str, instruction: dict,
                          symbol_table: SymbolTable, path) -> tuple:
        ParameterValidator.assert_keys_present(list(instruction.keys()),
                                               ["type"],
                                               InstructionParser.__name__, key)
        valid_instructions = [
            cls[:-6]
            for cls in ReflectionHandler.discover_classes_by_partial_name(
                "Parser", "dsl/instruction_parsers/")
        ]
        ParameterValidator.assert_in_valid_list(instruction["type"],
                                                valid_instructions,
                                                "InstructionParser", "type")

        parser = ReflectionHandler.get_class_by_name(
            "{}Parser".format(instruction["type"]), "instruction_parsers/")()
        instruction_object = parser.parse(key, instruction, symbol_table, path)

        symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object)
        return instruction, symbol_table
コード例 #18
0
    def _prepare_params(self, analysis: dict,
                        symbol_table: SymbolTable) -> dict:

        valid_keys = [
            "dataset", "report", "preprocessing_sequence", "labels",
            "encoding", "number_of_processes"
        ]
        ParameterValidator.assert_keys(list(analysis.keys()), valid_keys,
                                       "ExploratoryAnalysisParser", "analysis",
                                       False)

        params = {
            "dataset": symbol_table.get(analysis["dataset"]),
            "report": copy.deepcopy(symbol_table.get(analysis["report"]))
        }

        optional_params = self._prepare_optional_params(analysis, symbol_table)
        params = {**params, **optional_params}

        return params
コード例 #19
0
ファイル: ImportParser.py プロジェクト: rofrank/immuneML
    def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: str) -> SymbolTable:
        location = "ImportParser"

        ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False)

        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataImport, "Import", "IO/dataset_import/")
        ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format")

        import_cls = ReflectionHandler.get_class_by_name("{}Import".format(dataset_specs["format"]))
        params = ImportParser._prepare_params(dataset_specs, result_path, key)


        if "is_repertoire" in params:
            ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire")

            if params["is_repertoire"] == True:
                if import_cls != IReceptorImport:
                    assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/"
                    ParameterValidator.assert_type_and_value(params["metadata_file"], str, location, "metadata_file")

            if params["is_repertoire"] == False:
                assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/"
                ParameterValidator.assert_type_and_value(params["paired"], bool, location, "paired")

                if params["paired"] == True:
                    assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/"
                    ParameterValidator.assert_in_valid_list(params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains")

        try:
            dataset = import_cls.import_dataset(params, key)
            dataset.name = key
            symbol_table.add(key, SymbolType.DATASET, dataset)
        except KeyError as key_error:
            raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
                           f"The keyword {key_error.args[0]} was missing. This either means this argument was "
                           f"not defined under definitions/datasets/{key}/params, or this column was missing from "
                           f"an input data file. ")
        except Exception as ex:
            raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.")

        return symbol_table
コード例 #20
0
 def _prepare_reports(self, reports: list,
                      symbol_table: SymbolTable) -> dict:
     if reports is not None:
         report_objects = {
             report_id: symbol_table.get(report_id)
             for report_id in reports
         }
         ParameterValidator.assert_all_type_and_value(
             report_objects.values(), TrainMLModelReport,
             TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
コード例 #21
0
ファイル: SimulationParser.py プロジェクト: rofrank/immuneML
    def _parse_simulation(key: str, simulation: dict,
                          symbol_table: SymbolTable) -> SymbolTable:

        location = "SimulationParser"
        valid_implanting_keys = [
            "dataset_implanting_rate", "repertoire_implanting_rate", "signals",
            "is_noise"
        ]
        implantings = []

        for impl_key, implanting in simulation.items():

            ParameterValidator.assert_keys(implanting.keys(),
                                           valid_implanting_keys,
                                           location,
                                           impl_key,
                                           exclusive=False)
            ParameterValidator.assert_keys(
                implanting["signals"],
                symbol_table.get_keys_by_type(SymbolType.SIGNAL), location,
                impl_key, False)

            implanting_params = copy.deepcopy(implanting)
            implanting_params["signals"] = [
                symbol_table.get(signal) for signal in implanting["signals"]
            ]
            implanting_params["name"] = impl_key

            implantings.append(Implanting(**implanting_params))

        assert sum([settings["dataset_implanting_rate"] for settings in simulation.values()]) <= 1, \
            "The total dataset implanting rate can not exceed 1."

        symbol_table.add(key, SymbolType.SIMULATION, Simulation(implantings))

        return symbol_table
コード例 #22
0
    def test_parse_simulation(self):

        simulation = {
            "sim1": {
                "var1": {
                    "signals": ["signal1"],
                    "dataset_implanting_rate": 0.5,
                    "repertoire_implanting_rate": 0.1
                }
            }
        }

        symbol_table = SymbolTable()
        symbol_table.add("motif1", SymbolType.MOTIF, Motif("motif1", GappedKmerInstantiation(position_weights={0: 1}), seed="CAS"))
        symbol_table.add("signal1", SymbolType.SIGNAL, Signal("signal1", [symbol_table.get("motif1")],
                                                              HealthySequenceImplanting(GappedMotifImplanting(), implanting_computation=ImplantingComputation.ROUND)))

        symbol_table, specs = SimulationParser.parse_simulations(simulation, symbol_table)

        self.assertTrue(symbol_table.contains("sim1"))
        sim1 = symbol_table.get("sim1")
        self.assertEqual(1, len(sim1.implantings))
コード例 #23
0
ファイル: ImmuneMLParser.py プロジェクト: rofrank/immuneML
    def parse(workflow_specification: dict, file_path, result_path):

        symbol_table = SymbolTable()

        def_parser_output, specs_defs = DefinitionParser.parse(
            workflow_specification, symbol_table, result_path)
        symbol_table, specs_instructions = InstructionParser.parse(
            def_parser_output, result_path)
        app_output = OutputParser.parse(workflow_specification, symbol_table)

        path = ImmuneMLParser._output_specs(file_path=file_path,
                                            result_path=result_path,
                                            definitions=specs_defs,
                                            instructions=specs_instructions,
                                            output=app_output)

        return symbol_table, path
コード例 #24
0
    def _extract_reports(self):
        with open(self.specification_path, "r") as file:
            workflow_specification = yaml.safe_load(file)

        report_keys = list(workflow_specification['instructions'].values()
                           )[0]['benchmark_reports']

        ParameterValidator.assert_all_in_valid_list(
            report_keys,
            list(workflow_specification['definitions']['reports'].keys()),
            MultiDatasetBenchmarkTool.__name__, "benchmark_reports")

        reports = {
            key: value
            for key, value in workflow_specification['definitions']
            ['reports'].items() if key in report_keys
        }
        symbol_table, _ = ReportParser.parse_reports(reports, SymbolTable())
        self.reports = [
            entry.item for entry in symbol_table.get_by_type(SymbolType.REPORT)
        ]
コード例 #25
0
    def test_parse(self):
        path = EnvironmentSettings.root_path + "test/tmp/parser/"

        PathBuilder.build(path + "tmp_input/")
        with open(path + "tmp_input/CD1_clones_TRA.csv", "w") as file:
            writer = csv.DictWriter(file,
                                    delimiter="\t",
                                    fieldnames=[
                                        "patient", "dilution", "cloneCount",
                                        "allVHitsWithScore",
                                        "allJHitsWithScore", "nSeqCDR1",
                                        "nSeqCDR2", "nSeqCDR3", "minQualCDR3",
                                        "aaSeqCDR1", "aaSeqCDR2", "aaSeqCDR3",
                                        "sampleID"
                                    ])
            dicts = [{
                "patient": "CD12",
                "dilution": "108'",
                "cloneCount": 3,
                "allVHitsWithScore": "TRAV13-1*00(735)",
                "allJHitsWithScore": "TRAJ15*00(243)",
                "nSeqCDR1": "TGTGCAGCAA",
                "nSeqCDR2": "TGTGCAGCAA",
                "nSeqCDR3": "TGTGCAGCAA",
                "minQualCDR3": 10,
                "aaSeqCDR1": "VFAVFA",
                "aaSeqCDR2": "VFAVFA",
                "aaSeqCDR3": "VFAVFA",
                "sampleID": "2"
            }, {
                "patient": "CD12",
                "dilution": "108'",
                "cloneCount": 5,
                "allVHitsWithScore": "TRAV14-1*00(735)",
                "allJHitsWithScore": "TRAJ12*00(243)",
                "nSeqCDR1": "CAATGTGA",
                "nSeqCDR2": "CAATGTGA",
                "nSeqCDR3": "CAATGTGA",
                "minQualCDR3": 10,
                "aaSeqCDR1": "CASCAS",
                "aaSeqCDR2": "CASCAS",
                "aaSeqCDR3": "CASCAS",
                "sampleID": "3"
            }]

            writer.writeheader()
            writer.writerows(dicts)

        with open(path + "tmp_input/HC2_clones_TRB.csv", "w") as file:
            writer = csv.DictWriter(file,
                                    delimiter="\t",
                                    fieldnames=[
                                        "patient", "dilution", "cloneCount",
                                        "allVHitsWithScore",
                                        "allJHitsWithScore", "nSeqCDR1",
                                        "nSeqCDR2", "nSeqCDR3", "minQualCDR3",
                                        "aaSeqCDR1", "aaSeqCDR2", "aaSeqCDR3",
                                        "sampleID"
                                    ])
            dicts = [{
                "patient": "CD12",
                "dilution": "108'",
                "cloneCount": 3,
                "allVHitsWithScore": "TRAV13-1*00(735)",
                "allJHitsWithScore": "TRAJ15*00(243)",
                "nSeqCDR1": "TGTGCAGCAA",
                "nSeqCDR2": "TGTGCAGCAA",
                "nSeqCDR3": "TGTGCAGCAA",
                "minQualCDR3": 10,
                "aaSeqCDR1": "CAASNQA",
                "aaSeqCDR2": "CAASNQA",
                "aaSeqCDR3": "CAASNQA",
                "sampleID": "1"
            }, {
                "patient": "CD12",
                "dilution": "108'",
                "cloneCount": 6,
                "allVHitsWithScore": "TRAV19-1*00(735)",
                "allJHitsWithScore": "TRAJ12*00(243)",
                "nSeqCDR1": "CAATGTGA",
                "nSeqCDR2": "CAATGTGA",
                "nSeqCDR3": "CAATGTGA",
                "minQualCDR3": 10,
                "aaSeqCDR1": "CAASNTTA",
                "aaSeqCDR2": "CAASNTTA",
                "aaSeqCDR3": "CAASNTTA",
                "sampleID": 1
            }, {
                "patient": "CD12",
                "dilution": "108'",
                "cloneCount": 6,
                "allVHitsWithScore": "TRAV19-1*00(735)",
                "allJHitsWithScore": "TRAJ12*00(243)",
                "nSeqCDR1": "CAATGTGA",
                "nSeqCDR2": "CAATGTGA",
                "nSeqCDR3": "CAATGTGA",
                "minQualCDR3": 10,
                "aaSeqCDR1": "CAASNTTA",
                "aaSeqCDR2": "CAASNTTA",
                "aaSeqCDR3": "CAASNTTA",
                "sampleID": 1
            }]

            writer.writeheader()
            writer.writerows(dicts)

        metadata = pd.DataFrame({
            "filename": ["HC2_clones_TRB.csv", "CD1_clones_TRA.csv"],
            "subject_id": ["HC2", "CD1"],
            "CD": [False, True]
        })
        metadata.to_csv(path + "metadata.csv")
        specs = {
            "datasets": {
                "d1": {
                    "format": "MiXCR",
                    "params": {
                        "is_repertoire": True,
                        "path": path + "tmp_input/",
                        "metadata_file": path + "metadata.csv",
                        "number_of_processes": 2,
                    }
                }
            }
        }

        st, desc = ImportParser.parse(specs, SymbolTable(),
                                      path + "tmp_output/")
        self.assertTrue(isinstance(st.get("d1"), RepertoireDataset))
        self.assertEqual(2, len(st.get("d1").get_data()))

        shutil.rmtree(path)
コード例 #26
0
 def _prepare_context(self, instruction: dict, symbol_table: SymbolTable):
     return {"dataset": symbol_table.get(instruction["dataset"])}
コード例 #27
0
ファイル: test_symbolTable.py プロジェクト: rofrank/immuneML
 def test_add(self):
     symbol_table = SymbolTable()
     symbol_table.add("svm1", SymbolType.ML_METHOD, {})
     with self.assertWarns(Warning):
         symbol_table.add("svm1", SymbolType.ML_METHOD, {})
コード例 #28
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: str = None) -> TrainMLModelInstruction:

        valid_keys = [
            "assessment", "selection", "dataset", "strategy", "labels",
            "metrics", "settings", "number_of_processes", "type", "reports",
            "optimization_metric", 'refit_optimal_model', 'store_encoded_data'
        ]
        ParameterValidator.assert_type_and_value(instruction['settings'], list,
                                                 TrainMLModelParser.__name__,
                                                 'settings')
        ParameterValidator.assert_keys(list(instruction.keys()), valid_keys,
                                       TrainMLModelParser.__name__,
                                       "TrainMLModel")
        ParameterValidator.assert_type_and_value(
            instruction['refit_optimal_model'], bool,
            TrainMLModelParser.__name__, 'refit_optimal_model')
        ParameterValidator.assert_type_and_value(instruction['metrics'], list,
                                                 TrainMLModelParser.__name__,
                                                 'metrics')
        ParameterValidator.assert_type_and_value(
            instruction['optimization_metric'], str,
            TrainMLModelParser.__name__, 'optimization_metric')
        ParameterValidator.assert_type_and_value(
            instruction['number_of_processes'], int,
            TrainMLModelParser.__name__, 'number_of_processes')
        ParameterValidator.assert_type_and_value(instruction['strategy'], str,
                                                 TrainMLModelParser.__name__,
                                                 'strategy')
        ParameterValidator.assert_type_and_value(
            instruction['store_encoded_data'], bool,
            TrainMLModelParser.__name__, 'store_encoded_data')

        settings = self._parse_settings(instruction, symbol_table)
        dataset = symbol_table.get(instruction["dataset"])
        assessment = self._parse_split_config(key, instruction, "assessment",
                                              symbol_table, len(settings))
        selection = self._parse_split_config(key, instruction, "selection",
                                             symbol_table, len(settings))
        assessment, selection = self._update_split_configs(
            assessment, selection, dataset)
        label_config = self._create_label_config(instruction, dataset, key)
        strategy = ReflectionHandler.get_class_by_name(
            instruction["strategy"], "hyperparameter_optimization/")
        metrics = {Metric[metric.upper()] for metric in instruction["metrics"]}
        optimization_metric = Metric[
            instruction["optimization_metric"].upper()]
        metric_search_criterion = Metric.get_search_criterion(
            optimization_metric)
        path = self._prepare_path(instruction)
        context = self._prepare_context(instruction, symbol_table)
        reports = self._prepare_reports(instruction["reports"], symbol_table)

        hp_instruction = TrainMLModelInstruction(
            dataset=dataset,
            hp_strategy=strategy(settings, metric_search_criterion),
            hp_settings=settings,
            assessment=assessment,
            selection=selection,
            metrics=metrics,
            optimization_metric=optimization_metric,
            refit_optimal_model=instruction['refit_optimal_model'],
            label_configuration=label_config,
            path=path,
            context=context,
            store_encoded_data=instruction['store_encoded_data'],
            number_of_processes=instruction["number_of_processes"],
            reports=reports,
            name=key)

        return hp_instruction
コード例 #29
0
ファイル: test_reportParser.py プロジェクト: rofrank/immuneML
 def test_parse_reports(self):
     reports = {"r1": {"SequenceLengthDistribution": {}}}
     symbol_table = SymbolTable()
     symbol_table, specs = ReportParser.parse_reports(reports, symbol_table)
     self.assertTrue(symbol_table.contains("r1"))
     self.assertTrue(isinstance(symbol_table.get("r1"), SequenceLengthDistribution))
コード例 #30
0
    def __init__(self, symbol_table: SymbolTable, specification: dict):
        assert any(len(symbol_table.get_by_type(symbol_type)) > 0 for symbol_type in [t for t in SymbolType]), \
            "DefinitionParserOutput: symbol table has not been populated by objects of any type."

        self.symbol_table = symbol_table
        self.specification = specification