def test_cv_score_is_more_than_zero_with_LSVC_SVC_KNN_GNB_accuracy_5(self): # path to diabetes.csv file in project path = ".\\..\\datasets\\diabetes.csv" # get df with loader creator csv_type = self._loader_creator.create_loader(path, "CSV") df = csv_type.get_file_transformed() # split df into x and y splitter = SplitterReturner() x, y = splitter.split_x_y_from_df(df) # create a CVScore object with its path and data type cv_score = CVScore() # create a simple a svc, knn and gnb estimator model_1 = self._estimator_creator.create_estimator("SVC") model_2 = self._estimator_creator.create_estimator( "KNeighborsClassifier") model_3 = self._estimator_creator.create_estimator("GaussianNB") model_4 = self._estimator_creator.create_estimator("LinearSVC") estimators = [ model_1, model_2, model_3, model_4.set_params(dual=False) ] # get score from a linearSVC estimator with accuracy score and 5folds bol_results = [] for clf in estimators: score = cv_score.get_score(x, y, clf, "accuracy", 5) print(clf.__class__.__name__, "score is:", score) is_greater_than_zero: bool = True if score > 0 else False bol_results.append(is_greater_than_zero) # any will return True if there's any truth value in the iterable. print(bol_results) answer = all(bol_results) # all of this should be true self.assertTrue(answer)
def test_cv_score_is_more_than_zero_with_APROPAGATION_KMEANS_MINIKMEANS_MEANSHIFT_mutual_info_score_5( self): # path to diabetes.csv file in project path = ".\\..\\datasets\\iris.csv" # get df with loader creator csv_type = self._loader_creator.create_loader(path, "CSV") df = csv_type.get_file_transformed() # split df into x and y splitter = SplitterReturner() x, y = splitter.split_x_y_from_df(df) # create a CVScore object with its path and data type cv_score = CVScore() # create a simple a svc, knn and gnb estimator model_1 = self._estimator_creator.create_estimator( "AffinityPropagation") model_2 = self._estimator_creator.create_estimator("KMeans") model_3 = self._estimator_creator.create_estimator("MiniBatchKMeans") model_4 = self._estimator_creator.create_estimator("MeanShift") estimators = [ model_1.set_params(random_state=0), model_2, model_3, model_4 ] # get score from a linearSVC estimator with accuracy score and 5folds bol_results = [] for clf in estimators: score = cv_score.get_score(x, y, clf, "mutual_info_score", 5) print(clf.__class__.__name__, "score is:", score) is_greater_than_zero: bool = True if score > 0 else False bol_results.append(is_greater_than_zero) print(bol_results)
def test_cv_score_is_more_than_zero_with_LSVR_SVR_LASSO_SGD_explained_variance_5( self): # path to diabetes.csv file in project path = ".\\..\\datasets\\winequality-white.csv" # get df with loader creator scsv_type = self._loader_creator.create_loader(path, "SCSV") df = scsv_type.get_file_transformed() # split df into x and y splitter = SplitterReturner() x, y = splitter.split_x_y_from_df(df) # create a CVScore object with its path and data type cv_score = CVScore() # create a simple a svc, knn and gnb estimator model_1 = self._estimator_creator.create_estimator("LinearSVR") model_2 = self._estimator_creator.create_estimator("SVR") model_3 = self._estimator_creator.create_estimator("Lasso") model_4 = self._estimator_creator.create_estimator("SGDClassifier") estimators = [model_1, model_2, model_3, model_4] # get score from a linearSVC estimator with accuracy score and 5folds bol_results = [] for clf in estimators: score = cv_score.get_score(x, y, clf, "explained_variance", 5) print(clf.__class__.__name__, "score is:", score) is_greater_than_zero: bool = True if score > 0 else False bol_results.append(is_greater_than_zero) print(bol_results) # there is at least one true element, which means on of the scores is greater than 0 self.assertTrue(any(bol_results))
def test_n_folds_validation_and_score_type_raises_ValueError(self): # path to diabetes.csv file in project path = ".\\..\\datasets\\diabetes.csv" # get df with loader creator csv_type = self._loader_creator.create_loader(path, "CSV") df = csv_type.get_file_transformed() # split df into x and y splitter = SplitterReturner() x, y = splitter.split_x_y_from_df(df) # create a CVScore object with its path and data type cv_score = CVScore() # create a simple linearSVC estimator model = self._estimator_creator.create_estimator("LinearSVC") model.set_params(dual=False, random_state=0) with self.assertRaises(ValueError): # get score from a linearSVC estimator with roc_auc score and 10 folds _ = cv_score.get_score(x, y, model, "roc", 2)
class SBSMachineLearning(ABC): _data_frame = pd.DataFrame() _feature_selector: FeatureSelection = None _parameter_selector: ParameterSearch = None _best_features: NpArray = None _best_params: dict = None _initial_params: dict = None _clf: Any = None _cv_score: CVModelScore = CVScore() @property def data_frame(self) -> DataFrame: return self._data_frame @data_frame.setter def data_frame(self, value: DataFrame) -> None: self._data_frame = value @property def feature_selector(self) -> FeatureSelection: return self._feature_selector @feature_selector.setter def feature_selector(self, value: FeatureSelection) -> None: self._feature_selector = value @property def parameter_selector(self) -> ParameterSearch: return self._parameter_selector @parameter_selector.setter def parameter_selector(self, value: ParameterSearch) -> None: self._parameter_selector = value @property def best_features(self) -> NpArray: return self._best_features @best_features.setter def best_features(self, value: NpArray) -> None: self._best_features = value @property def best_parameters(self) -> dict: return self._best_params @best_parameters.setter def best_parameters(self, value: dict) -> None: self._best_params = value @property def initial_parameters(self) -> dict: return self._initial_params @initial_parameters.setter def initial_parameters(self, value: dict) -> None: self._initial_params = value @property def estimator(self) -> Any: return self._clf @estimator.setter def estimator(self, value: Any) -> None: self._clf = value @abstractmethod def score_model(self, score_type: str, n_folds_validation: int) -> float: pass
class FeatureSelection(ABC): _cv_score: CVModelScore = CVScore() @abstractmethod def select_features(self, x: DataFrame, y: NpArray, model: Any, score_type: str, n_folds_validation: int) -> tuple: pass