def test_sequence_dataset(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_dataset_gen_html_sequence/")
        dataset_path = path / "sequence_dataset/"

        specs = {
            "definitions": {
                "datasets": {
                    "sequencedataset": {
                        "format": "RandomSequenceDataset",
                        "params": {
                            "sequence_count": 10,
                            "length_probabilities": {
                                10: 1
                            },
                            "labels": {
                                "epitope_a": {
                                    True: 0.5,
                                    False: 0.5
                                },
                                "epitope_b": {
                                    True: 0.5,
                                    False: 0.5
                                }
                            },
                            "result_path": str(dataset_path)
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "DatasetExport",
                    "export_formats": ["Pickle", "AIRR"],
                    "datasets": ["sequencedataset"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        specs_path = path / "specs.yaml"
        with open(specs_path, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_path, path / "result/")
        app.run()

        shutil.rmtree(path)
Ejemplo n.º 2
0
    def test_dataset_generation(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "cv_split_variant/")
        repertoire_specs = self.build_specs(path)

        specs_filename = path / "specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path / "result/")
        app.run()

        shutil.rmtree(path)
Ejemplo n.º 3
0
    def test_subsampling(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "subsampling_workflow/")
        repertoire_specs = self.build_specs(path)

        specs_filename = path / "specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path / "result/")
        app.run()

        shutil.rmtree(path)
Ejemplo n.º 4
0
    def _run(self):
        PathBuilder.build(self.result_path)
        self._prepare_specs()
        app = ImmuneMLApp(self.yaml_path, self.result_path)
        app.run()

        model_locations = list(self.result_path.glob(f"{self.instruction_name}/optimal_*/zip/*.zip"))

        model_export_path = PathBuilder.build(self.result_path / 'exported_models/')

        for model_location in model_locations:
            shutil.copyfile(model_location, model_export_path / model_location.name)

        logging.info(f"{GalaxyTrainMLModel.__name__}: immuneML has finished and the trained models were exported.")
Ejemplo n.º 5
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)

        self._simulate_dataset_with_signals(result_path / "synthetic_dataset")

        print("immuneML quickstart: training a machine learning model...")
        specs_file = self.create_specfication(result_path /
                                              "machine_learning_analysis")
        app = ImmuneMLApp(specs_file,
                          result_path / "machine_learning_analysis/result")
        app.run()

        print(
            "immuneML quickstart: finished training a machine learning model.")
Ejemplo n.º 6
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)

        logging.basicConfig(filename=Path(result_path) / "log.txt", level=logging.ERROR, format='%(asctime)s %(levelname)s: %(message)s')
        warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: logging.warning(message)

        self._simulate_dataset_with_signals(result_path / "synthetic_dataset")

        print("immuneML quickstart: training a machine learning model...")
        specs_file = self.create_specfication(result_path / "machine_learning_analysis")
        app = ImmuneMLApp(specs_file, result_path / "machine_learning_analysis/result")
        app.run()

        print("immuneML quickstart: finished training a machine learning model.")
Ejemplo n.º 7
0
 def _run(self):
     PathBuilder.build(self.result_path)
     self._update_specs()
     state = ImmuneMLApp(self.yaml_path, self.result_path).run()[0]
     shutil.copytree(
         list(list(state.paths.values())[0].values())[0],
         self.result_path / "result/")
     print("Exported dataset.")
Ejemplo n.º 8
0
 def _run(self):
     PathBuilder.build(self.result_path)
     self._check_specs()
     state = ImmuneMLApp(self.yaml_path, self.result_path).run()[0]
     if os.path.relpath(state.predictions_path) != os.path.relpath(
             self.result_path / "predictions.csv"):
         shutil.copy(state.predictions_path,
                     self.result_path / "predictions.csv")
     print("Applied ML model to the dataset, predictions are available.")
Ejemplo n.º 9
0
    def test_simulation(self):
        path = EnvironmentSettings.tmp_test_path / "integration_simulation/"
        self.prepare_dataset(path)
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path / "result/")

        app = ImmuneMLApp(specification_path=specs_path,
                          result_path=path / "result/")
        app.run()

        self.assertTrue(os.path.isfile(path / "result/inst1/metadata.csv"))

        metadata_df = pd.read_csv(path / "result/inst1/metadata.csv",
                                  comment=Constants.COMMENT_SIGN)
        self.assertTrue("signal1" in metadata_df.columns)
        self.assertEqual(17, sum(metadata_df["signal1"]))

        self.assertTrue(os.path.isfile(path / "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path / "result/inst1/exported_dataset/pickle/d1.iml_dataset"))

        shutil.rmtree(path)
Ejemplo n.º 10
0
    def test_ml(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_ml/")
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path / "result_export/")

        app = ImmuneMLApp(specification_path=Path(specs_path),
                          result_path=path / "result_export/")
        states = app.run()

        self.assertTrue(os.path.isfile(path / "result_export/index.html"))

        specs_path = self.prepare_import_specs(path)

        app = ImmuneMLApp(Path(specs_path), path / 'result_import/')
        result_path = app.run()

        self.assertTrue(os.path.isfile(path / "result_import/index.html"))

        shutil.rmtree(path)
Ejemplo n.º 11
0
 def run_tool(yaml_path, result_path):
     PathBuilder.build(result_path)
     app = ImmuneMLApp(yaml_path, result_path)
     app.run()
Ejemplo n.º 12
0
    def test_simulation_receptors(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_simulation_receptor/")
        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "receptor_count": 100,
                            "chain_1_length_probabilities": {
                                10: 1
                            },
                            "chain_2_length_probabilities": {
                                10: 1
                            },
                            "result_path": str(path / "dataset/"),
                            "labels": {}
                        }
                    },
                },
                "motifs": {
                    "motif1": {
                        "seed_chain1": "CC/C",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "F/FF",
                        "instantiation": {
                            "GappedKmer": {
                                "max_gap": 1,
                                "alphabet_weights": None,
                                "position_weights": None
                            },
                        }
                    },
                    "motif2": {
                        "seed_chain1": "CCC",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "FFF",
                        "instantiation": "GappedKmer"
                    }
                },
                "signals": {
                    "signal1": {
                        "motifs": ["motif1", "motif2"],
                        "implanting": "Receptor",
                        "sequence_position_weights": None
                    },
                    "signal2": {
                        "motifs": ["motif1"],
                        "implanting": "Receptor",
                        "sequence_position_weights": None
                    }
                },
                "simulations": {
                    "sim1": {
                        "var1": {
                            "signals": ["signal1"],
                            "dataset_implanting_rate": 0.5
                        },
                        "var2": {
                            "signals": ["signal2"],
                            "dataset_implanting_rate": 0.5
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "Simulation",
                    "dataset": "d1",
                    "simulation": "sim1",
                    "export_formats": ["Pickle"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        with open(path / "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path / "specs.yaml", path / "result/")
        app.run()

        self.assertTrue(os.path.isfile(path / "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path / "result/inst1/exported_dataset/pickle/d1.iml_dataset"))
        dataset = PickleImport.import_dataset(
            {
                "path":
                path / "result/inst1/exported_dataset/pickle/d1.iml_dataset"
            }, "d1")

        self.assertEqual(100, dataset.get_example_count())
        self.assertEqual(
            100,
            len([
                receptor for receptor in dataset.get_data()
                if "signal1" in receptor.metadata
            ]))
        self.assertEqual(
            50,
            len([
                receptor for receptor in dataset.get_data()
                if receptor.metadata["signal1"]
            ]))
        self.assertEqual(
            100,
            len([
                receptor for receptor in dataset.get_data()
                if "signal2" in receptor.metadata
            ]))
        self.assertEqual(
            50,
            len([
                receptor for receptor in dataset.get_data()
                if receptor.metadata["signal2"]
            ]))

        shutil.rmtree(path)
Ejemplo n.º 13
0
    def test_encoding(self):

        path = EnvironmentSettings.tmp_test_path / "integration_test_emerson_encoding/"
        PathBuilder.build(path)

        ref_path = path / "reference.csv"
        pd.DataFrame({
            "sequence_aas": ["GGG", "III", "TTT", "EFEF"],
            "v_alleles":
            ["TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01"],
            'j_alleles': ["TRBJ2-7", "TRBJ2-7", "TRBJ2-7", "TRBJ2-7"]
        }).to_csv(ref_path, index=False)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"]],
            labels={
                "l1": [
                    True, True, False, False, True, True, False, False, True,
                    True, False, False, True, True, False, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    labels={"l1": [True, False]})
        ImmuneMLExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "ImmuneML",
                        "params": {
                            "path": str(path / f"{dataset.name}.iml_dataset"),
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "ReferenceSequenceOverlap": {
                            "reference_path":
                            str(ref_path),
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                }
            }
        }

        specs_file = path / "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path / "result")
        app.run()

        shutil.rmtree(path)
Ejemplo n.º 14
0
    def test(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path /
                                 "integration_receptor_cnn_workflow/")

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "result_path": str(path / "generated_dataset/"),
                            "receptor_count": 500,
                            "chain_1_length_probabilities": {
                                5: 1.
                            },
                            "chain_2_length_probabilities": {
                                6: 1.
                            },
                            "labels": {
                                "cmv_epitope": {
                                    True: 0.5,
                                    False: 0.5
                                }
                            }
                        }
                    }
                },
                "encodings": {
                    "enc1": {
                        "OneHot": {
                            "use_positional_info": True
                        }
                    }
                },
                "ml_methods": {
                    "cnn": {
                        "ReceptorCNN": {
                            "iteration_count": 1000,
                            "evaluate_at": 10,
                            "batch_size": 100,
                            "number_of_threads": 4
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "enc1",
                        "ml_method": "cnn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 1,
                    },
                    "labels": ["cmv_epitope"],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 4,
                    "reports": None,
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": False,
                }
            }
        }

        with open(path / "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path / "specs.yaml", path / 'result/')
        app.run()

        shutil.rmtree(path)
Ejemplo n.º 15
0
    def test_generate(self):

        path = EnvironmentSettings.tmp_test_path / "disease_assoc_seq_cv/"
        PathBuilder.build(path)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"]],
            labels={
                "l1": [
                    True, False, True, False, True, False, True, False, True,
                    False, True, False, True, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    labels={"l1": [True, False]})
        PickleExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "Pickle",
                        "params": {
                            "path": str(path / f"{dataset.name}.iml_dataset"),
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'p_value_threshold': 0.5
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "DiseaseAssociatedSequenceCVOverlap": {
                            "compare_in_selection": True,
                            "compare_in_assessment": True
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                    "store_encoded_data": False
                }
            }
        }

        specs_file = path / "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path / "result/")
        state = app.run()[0]

        self.assertEqual(1, len(state.report_results))
        self.assertTrue(len(state.report_results[0].output_figures) > 0)
        self.assertTrue(len(state.report_results[0].output_tables) > 0)

        for fig in state.report_results[0].output_figures:
            self.assertTrue(os.path.isfile(fig.path))
        for table in state.report_results[0].output_tables:
            self.assertTrue(os.path.isfile(table.path))

        shutil.rmtree(path)