예제 #1
0
    def test_run(self):
        path = EnvironmentSettings.root_path / "test/tmp/mlmethodassessment/"
        PathBuilder.build(path)
        dataset = RepertoireDataset(repertoires=RepertoireBuilder.build(
            [["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"],
             ["AA"], ["CC"], ["AA"], ["CC"]], path)[0])
        dataset.encoded_data = EncodedData(
            examples=np.array([[1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3, 3],
                               [1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3,
                                                                        3]]),
            labels={
                "l1": [1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3],
                "l2": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
            })

        label_config = LabelConfiguration()
        label_config.add_label("l1", [1, 3])

        label = Label(name='l1', values=[1, 2])

        method1 = LogisticRegression()
        method1.fit(dataset.encoded_data, label=label)

        res = MLMethodAssessment.run(
            MLMethodAssessmentParams(
                dataset=dataset,
                method=method1,
                metrics={
                    Metric.ACCURACY, Metric.BALANCED_ACCURACY, Metric.F1_MACRO
                },
                optimization_metric=Metric.LOG_LOSS,
                predictions_path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/predictions.csv",
                label=label,
                ml_score_path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/ml_score.csv",
                split_index=1,
                path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/"))

        self.assertTrue(isinstance(res, dict))
        self.assertTrue(res[Metric.LOG_LOSS.name.lower()] <= 0.1)

        self.assertTrue(
            os.path.isfile(EnvironmentSettings.root_path /
                           "test/tmp/mlmethodassessment/ml_score.csv"))

        df = pd.read_csv(EnvironmentSettings.root_path /
                         "test/tmp/mlmethodassessment/ml_score.csv")
        self.assertTrue(df.shape[0] == 1)

        df = pd.read_csv(EnvironmentSettings.root_path /
                         "test/tmp/mlmethodassessment/predictions.csv")
        self.assertEqual(12, df.shape[0])

        shutil.rmtree(EnvironmentSettings.root_path /
                      "test/tmp/mlmethodassessment/")
예제 #2
0
    def test_run(self):
        method = LogisticRegression()
        dataset = RepertoireDataset()
        dataset.encoded_data = EncodedData(examples=np.array([[1, 2, 3],
                                                              [2, 3, 4],
                                                              [1, 2, 3],
                                                              [2, 3, 4],
                                                              [1, 2, 3],
                                                              [2, 3, 4]]),
                                           labels={
                                               "l1": [1, 0, 1, 0, 1, 0],
                                               "l2": [0, 1, 0, 1, 0, 1]
                                           },
                                           feature_names=["f1", "f2", "f3"])

        path = EnvironmentSettings.root_path / "test/tmp/mlmethodtrainer/"

        method = MLMethodTrainer.run(
            MLMethodTrainerParams(result_path=path,
                                  dataset=dataset,
                                  label=Label(name="l1", values=[0, 1]),
                                  method=method,
                                  model_selection_n_folds=2,
                                  model_selection_cv=True,
                                  cores_for_training=1,
                                  train_predictions_path=path /
                                  "predictions.csv",
                                  ml_details_path=path / "details.yaml",
                                  optimization_metric="balanced_accuracy"))

        method.predict(EncodedData(np.array([1, 2, 3]).reshape(1, -1)),
                       Label("l1"))
        self.assertTrue(os.path.isfile(path / "predictions.csv"))
        self.assertTrue(os.path.isfile(path / "details.yaml"))

        shutil.rmtree(path)