def test_fit_by_cross_validation(self): x = EncodedData( np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]), { "t1": [1, 0, 2, 0, 1, 0, 2, 0], "t2": [1, 0, 2, 0, 1, 0, 2, 0] }) svm = SVM() svm.fit_by_cross_validation(x, number_of_splits=2, label_name="t1")
def test_predict(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"test": np.array([1, 0, 2, 0])} svm = SVM() svm.fit(EncodedData(x, y), "test") test_x = np.array([[0, 1, 0], [1, 0, 0]]) y = svm.predict(EncodedData(test_x), 'test')["test"] self.assertTrue(len(y) == 2) self.assertTrue(y[0] in [0, 1, 2]) self.assertTrue(y[1] in [0, 1, 2])
def test_store_load(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"default": np.array(['a', "b", "c", "a"])} svm = SVM() svm._fit(sparse.csr_matrix(x), y["default"]) path = EnvironmentSettings.root_path / "test/tmp/store_load_sklearn/" details_path = EnvironmentSettings.root_path / "test/tmp/store_load_sklearn/details.yaml" svm.store(path=path, details_path=details_path) svm2 = SVM() svm2.load(path=path, details_path=details_path) shutil.rmtree(path)
def test_load(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"default": np.array([1, 0, 2, 0])} svm = SVM() svm.fit(EncodedData(x, y), 'default') path = EnvironmentSettings.tmp_test_path / "my_svm2/" PathBuilder.build(path) with open(path / "svm.pickle", "wb") as file: pickle.dump(svm.get_model(), file) svm2 = SVM() svm2.load(path) self.assertTrue(isinstance(svm2.get_model(), SVC)) shutil.rmtree(path)
def test_store(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"default": np.array(['a', "b", "c", "a"])} svm = SVM() svm.fit(EncodedData(x, y), 'default') path = EnvironmentSettings.root_path / "my_svm/" svm.store(path) self.assertTrue(os.path.isfile(path / "svm.pickle")) with open(path / "svm.pickle", "rb") as file: svm2 = pickle.load(file) self.assertTrue(isinstance(svm2, SVC)) shutil.rmtree(path)
def test_store(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"default": np.array(['a', "b", "c", "a"])} svm = SVM() svm._fit(sparse.csr_matrix(x), y["default"]) path = EnvironmentSettings.root_path / "test/tmp/storesklearn/" svm.store(path) self.assertTrue(os.path.isfile(path / "svm.pickle")) with open(path / "svm.pickle", "rb") as file: svm2 = pickle.load(file) self.assertTrue(isinstance(svm2, SVC)) shutil.rmtree(path)
def test_fit(self): x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]]) y = {"default": np.array([1, 0, 2, 0])} svm = SVM() svm.fit(EncodedData(x, y), 'default')
def test_run(self): path = EnvironmentSettings.tmp_test_path / "hpoptimproc/" PathBuilder.build(path) repertoires, metadata = RepertoireBuilder.build( sequences=[["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"], ["AAA", "CCC", "DDD"]], path=path, labels={ "l1": [ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 ], "l2": [ 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ] }) dataset = RepertoireDataset(repertoires=repertoires, metadata_file=metadata, labels={ "l1": [1, 2], "l2": [0, 1] }) enc1 = { "k": 3, "model_type": ModelType.SEQUENCE.name, "vector_size": 4 } enc2 = { "k": 3, "model_type": ModelType.SEQUENCE.name, "vector_size": 6 } hp_settings = [ HPSetting(Word2VecEncoder.build_object(dataset, **enc1), enc1, LogisticRegression(), { "model_selection_cv": False, "model_selection_n_folds": -1 }, []), HPSetting( Word2VecEncoder.build_object(dataset, **enc2), enc2, SVM(), { "model_selection_cv": False, "model_selection_n_folds": -1 }, [ClonesPerRepertoireFilter(lower_limit=-1, upper_limit=1000)]) ] report = SequenceLengthDistribution() label_config = LabelConfiguration( [Label("l1", [1, 2]), Label("l2", [0, 1])]) process = TrainMLModelInstruction( dataset, GridSearch(hp_settings), hp_settings, SplitConfig(SplitType.RANDOM, 1, 0.5, reports=ReportConfig(data_splits={"seqlen": report})), SplitConfig(SplitType.RANDOM, 1, 0.5, reports=ReportConfig(data_splits={"seqlen": report})), {Metric.BALANCED_ACCURACY}, Metric.BALANCED_ACCURACY, label_config, path) state = process.run(result_path=path) self.assertTrue(isinstance(state, TrainMLModelState)) self.assertEqual(1, len(state.assessment_states)) self.assertTrue("l1" in state.assessment_states[0].label_states) self.assertTrue("l2" in state.assessment_states[0].label_states) shutil.rmtree(path)