Esempio n. 1
0
    def test_simulation(self):
        path = EnvironmentSettings.tmp_test_path / "integration_simulation/"
        self.prepare_dataset(path)
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path / "result/")

        app = ImmuneMLApp(specification_path=specs_path,
                          result_path=path / "result/")
        app.run()

        self.assertTrue(os.path.isfile(path / "result/inst1/metadata.csv"))

        metadata_df = pd.read_csv(path / "result/inst1/metadata.csv",
                                  comment=Constants.COMMENT_SIGN)
        self.assertTrue("signal1" in metadata_df.columns)
        self.assertEqual(17, sum(metadata_df["signal1"]))

        self.assertTrue(os.path.isfile(path / "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path /
                "result/inst1/exported_dataset/immuneml/d1.iml_dataset"))

        shutil.rmtree(path)
Esempio n. 2
0
    def _simulate_dataset_with_signals(self, path: Path):

        print("immuneML quickstart: generating a synthetic dataset...")

        PathBuilder.build(path)

        specs = {
            "definitions": {
                "datasets": {
                    "my_synthetic_dataset": {"format": "RandomRepertoireDataset", "params": {"labels": {}}}
                },
                "motifs": {"my_motif": {"seed": "AA", "instantiation": "GappedKmer"}},
                "signals": {"my_signal": {"motifs": ["my_motif"], "implanting": "HealthySequence"}},
                "simulations": {"my_simulation": {"my_implantng": {"signals": ["my_signal"], "dataset_implanting_rate": 0.5,
                                                                   "repertoire_implanting_rate": 0.1}}}
            },
            "instructions": {"simulation_instruction": {"type": "Simulation", "dataset": "my_synthetic_dataset", "simulation": "my_simulation",
                                                        "export_formats": ["AIRR"]}}
        }

        specs_file = path / "simulation_specs.yaml"
        with specs_file.open("w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path / "result")
        app.run()

        print("immuneML quickstart: finished generating a synthetic dataset.")
Esempio n. 3
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)
        specs_file = self.create_specfication(result_path)

        app = ImmuneMLApp(specs_file, result_path / "quickstart/")
        app.run()
    def test_repertoire_dataset(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_dataset_gen_html_repertoire/")
        dataset_path = path / "repertoire_dataset/"

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomRepertoireDataset",
                        "params": {
                            "repertoire_count": 10,
                            "sequence_count_probabilities": {
                                10: 1
                            },
                            "sequence_length_probabilities": {
                                12: 1
                            },
                            "labels": {
                                "HLA": {
                                    "A": 0.5,
                                    "B": 0.5
                                }
                            },
                            "result_path": str(dataset_path)
                        }
                    }
                },
                "preprocessing_sequences": {
                    "p1": [{
                        "my_filter": {
                            "ClonesPerRepertoireFilter": {
                                "lower_limit": 1,
                            }
                        }
                    }]
                }
            },
            "instructions": {
                "instr1": {
                    "type": "DatasetExport",
                    "export_formats": ["ImmuneML", "AIRR"],
                    "datasets": ["d1"],
                    "preprocessing_sequence": "p1"
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        specs_path = path / "specs.yaml"
        with open(specs_path, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_path, path / "result/")
        app.run()

        shutil.rmtree(path)
    def test_receptor_dataset(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_dataset_gen_html_receptor/")
        dataset_path = path / "receptor_dataset/"

        specs = {
            "definitions": {
                "datasets": {
                    "receptordataset": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "receptor_count": 10,
                            "chain_1_length_probabilities": {
                                10: 1
                            },
                            "chain_2_length_probabilities": {
                                10: 1
                            },
                            "labels": {
                                "epitope_1": {
                                    True: 0.5,
                                    False: 0.5
                                },
                                "epitope_2": {
                                    True: 0.5,
                                    False: 0.5
                                }
                            },
                            "result_path": str(dataset_path)
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "DatasetExport",
                    "export_formats": ["Pickle", "AIRR"],
                    "datasets": ["receptordataset"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        specs_path = path / "specs.yaml"
        with open(specs_path, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_path, path / "result/")
        app.run()

        shutil.rmtree(path)
Esempio n. 6
0
    def run_example(self, specs: dict, path: str):

        PathBuilder.build(path)

        specs_filename = path / "specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_filename, path / "result/")
        app.run()

        shutil.rmtree(path)
Esempio n. 7
0
    def test_subsampling(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path / "subsampling_workflow/")
        repertoire_specs = self.build_specs(path)

        specs_filename = path / "specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path / "result/")
        app.run()

        shutil.rmtree(path)
Esempio n. 8
0
    def test_dataset_generation(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "cv_split_variant/")
        repertoire_specs = self.build_specs(path)

        specs_filename = path / "specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path / "result/")
        app.run()

        shutil.rmtree(path)
Esempio n. 9
0
    def _run(self):
        PathBuilder.build(self.result_path)
        self._prepare_specs()
        app = ImmuneMLApp(self.yaml_path, self.result_path)
        app.run()

        model_locations = list(self.result_path.glob(f"{self.instruction_name}/optimal_*/zip/*.zip"))

        model_export_path = PathBuilder.build(self.result_path / 'exported_models/')

        for model_location in model_locations:
            shutil.copyfile(model_location, model_export_path / model_location.name)

        logging.info(f"{GalaxyTrainMLModel.__name__}: immuneML has finished and the trained models were exported.")
Esempio n. 10
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)

        self._simulate_dataset_with_signals(result_path / "synthetic_dataset")

        print("immuneML quickstart: training a machine learning model...")
        specs_file = self.create_specfication(result_path /
                                              "machine_learning_analysis")
        app = ImmuneMLApp(specs_file,
                          result_path / "machine_learning_analysis/result")
        app.run()

        print(
            "immuneML quickstart: finished training a machine learning model.")
Esempio n. 11
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)

        logging.basicConfig(filename=Path(result_path) / "log.txt", level=logging.ERROR, format='%(asctime)s %(levelname)s: %(message)s')
        warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: logging.warning(message)

        self._simulate_dataset_with_signals(result_path / "synthetic_dataset")

        print("immuneML quickstart: training a machine learning model...")
        specs_file = self.create_specfication(result_path / "machine_learning_analysis")
        app = ImmuneMLApp(specs_file, result_path / "machine_learning_analysis/result")
        app.run()

        print("immuneML quickstart: finished training a machine learning model.")
Esempio n. 12
0
    def run(self):
        print("Starting MultiDatasetBenchmarkTool...", flush=True)
        PathBuilder.build(self.result_path)
        specs = self._split_specs_file()
        self._extract_reports()
        instruction_states = {}
        for index, specs_name in enumerate(specs.keys()):
            print(
                f"Running nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..",
                flush=True)
            app = ImmuneMLApp(specification_path=specs[specs_name],
                              result_path=self.result_path / specs_name)
            instruction_states[specs_name] = app.run()[0]
            print(
                f"Finished nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..",
                flush=True)

        print(
            "Running reports on the results of nested cross-validation on all datasets...",
            flush=True)
        report_results = self._run_reports(instruction_states)
        print("Finished reports, now generating HTML output...", flush=True)
        MultiDatasetBenchmarkHTMLBuilder.build(
            report_results, self.result_path, {
                specs_name: self.result_path / specs_name
                for specs_name in specs.keys()
            })
        print("MultiDatasetBenchmarkTool finished.", flush=True)
Esempio n. 13
0
    def _run(self):
        PathBuilder.build(self.result_path)
        self.update_specs()

        app = ImmuneMLApp(self.yaml_path, self.result_path)
        output_file_path = app.run()

        return output_file_path
Esempio n. 14
0
    def _run(self):
        yaml_path = main(self.args)

        PathBuilder.build(self.result_path)

        app = ImmuneMLApp(yaml_path, self.result_path)
        output_file_path = app.run()

        return output_file_path
Esempio n. 15
0
    def test_ml(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_ml/")
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path / "result_export/")

        app = ImmuneMLApp(specification_path=Path(specs_path),
                          result_path=path / "result_export/")
        states = app.run()

        self.assertTrue(os.path.isfile(path / "result_export/index.html"))

        specs_path = self.prepare_import_specs(path)

        app = ImmuneMLApp(Path(specs_path), path / 'result_import/')
        result_path = app.run()

        self.assertTrue(os.path.isfile(path / "result_import/index.html"))

        shutil.rmtree(path)
Esempio n. 16
0
 def run_tool(yaml_path, result_path):
     PathBuilder.build(result_path)
     app = ImmuneMLApp(yaml_path, result_path)
     app.run()
Esempio n. 17
0
    def test_simulation_receptors(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_simulation_receptor/")
        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "receptor_count": 100,
                            "chain_1_length_probabilities": {
                                10: 1
                            },
                            "chain_2_length_probabilities": {
                                10: 1
                            },
                            "result_path": str(path / "dataset/"),
                            "labels": {}
                        }
                    },
                },
                "motifs": {
                    "motif1": {
                        "seed_chain1": "CC/C",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "F/FF",
                        "instantiation": {
                            "GappedKmer": {
                                "max_gap": 1,
                                "alphabet_weights": None,
                                "position_weights": None
                            },
                        }
                    },
                    "motif2": {
                        "seed_chain1": "CCC",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "FFF",
                        "instantiation": "GappedKmer"
                    }
                },
                "signals": {
                    "signal1": {
                        "motifs": ["motif1", "motif2"],
                        "implanting": "Receptor",
                        "sequence_position_weights": None
                    },
                    "signal2": {
                        "motifs": ["motif1"],
                        "implanting": "Receptor",
                        "sequence_position_weights": None
                    }
                },
                "simulations": {
                    "sim1": {
                        "var1": {
                            "signals": ["signal1"],
                            "dataset_implanting_rate": 0.5
                        },
                        "var2": {
                            "signals": ["signal2"],
                            "dataset_implanting_rate": 0.5
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "Simulation",
                    "dataset": "d1",
                    "simulation": "sim1",
                    "export_formats": ["Pickle"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        with open(path / "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path / "specs.yaml", path / "result/")
        app.run()

        self.assertTrue(os.path.isfile(path / "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path / "result/inst1/exported_dataset/pickle/d1.iml_dataset"))
        dataset = PickleImport.import_dataset(
            {
                "path":
                path / "result/inst1/exported_dataset/pickle/d1.iml_dataset"
            }, "d1")

        self.assertEqual(100, dataset.get_example_count())
        self.assertEqual(
            100,
            len([
                receptor for receptor in dataset.get_data()
                if "signal1" in receptor.metadata
            ]))
        self.assertEqual(
            50,
            len([
                receptor for receptor in dataset.get_data()
                if receptor.metadata["signal1"]
            ]))
        self.assertEqual(
            100,
            len([
                receptor for receptor in dataset.get_data()
                if "signal2" in receptor.metadata
            ]))
        self.assertEqual(
            50,
            len([
                receptor for receptor in dataset.get_data()
                if receptor.metadata["signal2"]
            ]))

        shutil.rmtree(path)
Esempio n. 18
0
    def test_encoding(self):

        path = EnvironmentSettings.tmp_test_path / "integration_test_emerson_encoding/"
        PathBuilder.build(path)

        ref_path = path / "reference.csv"
        pd.DataFrame({
            "sequence_aas": ["GGG", "III", "TTT", "EFEF"],
            "v_alleles":
            ["TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01"],
            'j_alleles': ["TRBJ2-7", "TRBJ2-7", "TRBJ2-7", "TRBJ2-7"]
        }).to_csv(ref_path, index=False)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"]],
            labels={
                "l1": [
                    True, True, False, False, True, True, False, False, True,
                    True, False, False, True, True, False, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    labels={"l1": [True, False]})
        ImmuneMLExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "ImmuneML",
                        "params": {
                            "path": str(path / f"{dataset.name}.iml_dataset"),
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "ReferenceSequenceOverlap": {
                            "reference_path":
                            str(ref_path),
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                }
            }
        }

        specs_file = path / "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path / "result")
        app.run()

        shutil.rmtree(path)
    def test(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_receptor_cnn_workflow/")

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "result_path": str(path / "generated_dataset/"),
                            "receptor_count": 500,
                            "chain_1_length_probabilities": {
                                5: 1.
                            },
                            "chain_2_length_probabilities": {
                                6: 1.
                            },
                            "labels": {
                                "cmv_epitope": {
                                    True: 0.5,
                                    False: 0.5
                                }
                            }
                        }
                    }
                },
                "encodings": {
                    "enc1": {
                        "OneHot": {
                            "use_positional_info": True
                        }
                    }
                },
                "ml_methods": {
                    "cnn": {
                        "ReceptorCNN": {
                            "iteration_count": 1000,
                            "evaluate_at": 10,
                            "batch_size": 100,
                            "number_of_threads": 4
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "enc1",
                        "ml_method": "cnn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 1,
                    },
                    "labels": ["cmv_epitope"],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 4,
                    "reports": None,
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": False,
                }
            }
        }

        with open(path / "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path / "specs.yaml", path / 'result/')
        app.run()

        shutil.rmtree(path)
Esempio n. 20
0
    def test_generate(self):

        path = EnvironmentSettings.tmp_test_path / "disease_assoc_seq_cv/"
        PathBuilder.build(path)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"]],
            labels={
                "l1": [
                    True, False, True, False, True, False, True, False, True,
                    False, True, False, True, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    labels={"l1": [True, False]})
        PickleExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "Pickle",
                        "params": {
                            "path": str(path / f"{dataset.name}.iml_dataset"),
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'p_value_threshold': 0.5
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "DiseaseAssociatedSequenceCVOverlap": {
                            "compare_in_selection": True,
                            "compare_in_assessment": True
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                    "store_encoded_data": False
                }
            }
        }

        specs_file = path / "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path / "result/")
        state = app.run()[0]

        self.assertEqual(1, len(state.report_results))
        self.assertTrue(len(state.report_results[0].output_figures) > 0)
        self.assertTrue(len(state.report_results[0].output_tables) > 0)

        for fig in state.report_results[0].output_figures:
            self.assertTrue(os.path.isfile(fig.path))
        for table in state.report_results[0].output_tables:
            self.assertTrue(os.path.isfile(table.path))

        shutil.rmtree(path)