Example #1
0
    def test_fit_by_cross_validation(self):
        x = EncodedData(
            np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 0, 0],
                      [0, 1, 1], [1, 1, 1], [0, 1, 1]]), {
                          "t1": [1, 0, 2, 0, 1, 0, 2, 0],
                          "t2": [1, 0, 2, 0, 1, 0, 2, 0]
                      })

        svm = SVM()
        svm.fit_by_cross_validation(x, number_of_splits=2, label_name="t1")
Example #2
0
    def test_predict(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"test": np.array([1, 0, 2, 0])}

        svm = SVM()
        svm.fit(EncodedData(x, y), "test")

        test_x = np.array([[0, 1, 0], [1, 0, 0]])
        y = svm.predict(EncodedData(test_x), 'test')["test"]

        self.assertTrue(len(y) == 2)
        self.assertTrue(y[0] in [0, 1, 2])
        self.assertTrue(y[1] in [0, 1, 2])
Example #3
0
    def test_store_load(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array(['a', "b", "c", "a"])}

        svm = SVM()
        svm._fit(sparse.csr_matrix(x), y["default"])

        path = EnvironmentSettings.root_path / "test/tmp/store_load_sklearn/"
        details_path = EnvironmentSettings.root_path / "test/tmp/store_load_sklearn/details.yaml"

        svm.store(path=path, details_path=details_path)

        svm2 = SVM()
        svm2.load(path=path, details_path=details_path)

        shutil.rmtree(path)
Example #4
0
    def test_load(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array([1, 0, 2, 0])}

        svm = SVM()
        svm.fit(EncodedData(x, y), 'default')

        path = EnvironmentSettings.tmp_test_path / "my_svm2/"
        PathBuilder.build(path)

        with open(path / "svm.pickle", "wb") as file:
            pickle.dump(svm.get_model(), file)

        svm2 = SVM()
        svm2.load(path)

        self.assertTrue(isinstance(svm2.get_model(), SVC))

        shutil.rmtree(path)
Example #5
0
    def test_store(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array(['a', "b", "c", "a"])}

        svm = SVM()
        svm.fit(EncodedData(x, y), 'default')

        path = EnvironmentSettings.root_path / "my_svm/"

        svm.store(path)
        self.assertTrue(os.path.isfile(path / "svm.pickle"))

        with open(path / "svm.pickle", "rb") as file:
            svm2 = pickle.load(file)

        self.assertTrue(isinstance(svm2, SVC))

        shutil.rmtree(path)
Example #6
0
    def test_store(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array(['a', "b", "c", "a"])}

        svm = SVM()
        svm._fit(sparse.csr_matrix(x), y["default"])

        path = EnvironmentSettings.root_path / "test/tmp/storesklearn/"

        svm.store(path)
        self.assertTrue(os.path.isfile(path / "svm.pickle"))

        with open(path / "svm.pickle", "rb") as file:
            svm2 = pickle.load(file)

        self.assertTrue(isinstance(svm2, SVC))

        shutil.rmtree(path)
Example #7
0
    def test_fit(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array([1, 0, 2, 0])}

        svm = SVM()
        svm.fit(EncodedData(x, y), 'default')
Example #8
0
    def test_run(self):

        path = EnvironmentSettings.tmp_test_path / "hpoptimproc/"
        PathBuilder.build(path)

        repertoires, metadata = RepertoireBuilder.build(
            sequences=[["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"],
                       ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"]],
            path=path,
            labels={
                "l1": [
                    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
                ],
                "l2": [
                    0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
                    0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
                ]
            })

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    labels={
                                        "l1": [1, 2],
                                        "l2": [0, 1]
                                    })
        enc1 = {
            "k": 3,
            "model_type": ModelType.SEQUENCE.name,
            "vector_size": 4
        }
        enc2 = {
            "k": 3,
            "model_type": ModelType.SEQUENCE.name,
            "vector_size": 6
        }
        hp_settings = [
            HPSetting(Word2VecEncoder.build_object(dataset, **enc1), enc1,
                      LogisticRegression(), {
                          "model_selection_cv": False,
                          "model_selection_n_folds": -1
                      }, []),
            HPSetting(
                Word2VecEncoder.build_object(dataset, **enc2), enc2, SVM(), {
                    "model_selection_cv": False,
                    "model_selection_n_folds": -1
                },
                [ClonesPerRepertoireFilter(lower_limit=-1, upper_limit=1000)])
        ]

        report = SequenceLengthDistribution()
        label_config = LabelConfiguration(
            [Label("l1", [1, 2]), Label("l2", [0, 1])])

        process = TrainMLModelInstruction(
            dataset, GridSearch(hp_settings), hp_settings,
            SplitConfig(SplitType.RANDOM,
                        1,
                        0.5,
                        reports=ReportConfig(data_splits={"seqlen": report})),
            SplitConfig(SplitType.RANDOM,
                        1,
                        0.5,
                        reports=ReportConfig(data_splits={"seqlen": report})),
            {Metric.BALANCED_ACCURACY}, Metric.BALANCED_ACCURACY, label_config,
            path)

        state = process.run(result_path=path)

        self.assertTrue(isinstance(state, TrainMLModelState))
        self.assertEqual(1, len(state.assessment_states))
        self.assertTrue("l1" in state.assessment_states[0].label_states)
        self.assertTrue("l2" in state.assessment_states[0].label_states)

        shutil.rmtree(path)