Esempio n. 1
0
 def test_diabetes_works_with_automl(self):
     # create a JarAutoML objet with max_rand = 5000
     max_rand = 5000
     jar_model = JarAutoML(10, False, max_rand)
     # create a AutoExecutioner from the JarAutoML object
     model = AutoExecutioner(jar_model)
     loader = LoaderCreator.create_loader(".\\..\\datasets\\diabetes.csv",
                                          "csv")
     df = loader.get_file_transformed()
     model.train_model(df)
Esempio n. 2
0
class MyTestCase(unittest.TestCase):
    _estimator_creator = EstimatorCreator()
    _parameter_search_creator = ParameterSearchCreator()
    _model_creator = SBSModelCreator()
    _feature_selection_creator = FeatureSelectorCreator()
    _loader_creator = LoaderCreator()

    def test_available_types_in_EstimatorCreator(self):
        types = self._estimator_creator.get_available_types()
        expected = ('AffinityPropagation', 'GaussianNB', 'KMeans',
                    'KNeighborsClassifier', 'Lasso', 'LinearSVC', 'LinearSVR',
                    'MeanShift', 'MiniBatchKMeans', 'SGDClassifier', 'SVC',
                    'SVR')
        results = [True for i in types if i in expected]
        bol_answer = all(results)
        self.assertTrue(bol_answer)

    def test_available_types_in_ParameterSearchCreator(self):
        types = self._parameter_search_creator.get_available_types()
        expected = ('BS', 'BayesianSearch', 'GS', 'GridSearch')
        results = [True for i in types if i in expected]
        bol_answer = all(results)
        self.assertTrue(bol_answer)

    def test_available_types_in_SBSModelCreator(self):
        types = self._model_creator.get_available_types()
        expected = ('AM', 'FSM', 'FeatureAndParameterSearch',
                    'OnlyFeatureSelection', 'OnlyParameterSearch', 'PSM', 'SM',
                    'Simple')
        results = [True for i in types if i in expected]
        bol_answer = all(results)
        self.assertTrue(bol_answer)

    def test_available_types_in_FeatureSelectorCreator(self):
        types = self._feature_selection_creator.get_available_types()
        expected = ('BFS', 'BackwardsFeatureSelection', 'FFS',
                    'ForwardFeatureSelection')
        results = [True for i in types if i in expected]
        bol_answer = all(results)
        self.assertTrue(bol_answer)

    def test_available_types_in_LoaderCreator(self):
        types = self._loader_creator.get_available_types()
        expected = ("CSV", "TSV", "SCSV", "JSON")
        results = [True for i in types if i in expected]
        bol_answer = all(results)
        self.assertTrue(bol_answer)
Esempio n. 3
0
 def _handle_file(self) -> None:
     try:
         if self._last_btn_used is "Any":
             body = "Ningún archivo ha sido seleccionado. Por favor subir un archivo y seleccionar la separación " \
                    "del mismo, ya sea TSV o CSV"
             self.last_warning_pop_up(body, "")
         else:
             btn_load_file_file_path = self.btn_load_file.file_path
             btn_drag_file_file_path = self.btn_drag_file.file_path
             file_path = btn_load_file_file_path if self._last_btn_used is "Load" else btn_drag_file_file_path
             loader = LoaderCreator.create_loader(file_path,
                                                  self._df_file_type)
             data_frame = loader.get_file_transformed()
             variables.data_frame = data_frame
             self.next()
     except Exception as error:
         self.handle_error(error)
Esempio n. 4
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()
    _param_search_creator = ParameterSearchCreator()
    _estimator_creator = EstimatorCreator()

    def test_molecules_SVC_bayesian_search(self):
        # path to molecules.csv file in project
        path = ".\\..\\datasets\\molecules.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "TSV")
        df = csv_type.get_file_transformed()
        df = df.drop(["m_name"], axis=1)
        # split df into x and y
        x, y = SplitterReturner.split_x_y_from_df(df)
        # create a simple SVC estimator
        model = self._estimator_creator.create_estimator("SVC")
        # create a prm variable that stores the param grid to search
        prm = BayesianSearchParametersPossibilities.case("SVC")
        # create a ps variable that stores a bayesian search object
        ps = self._param_search_creator.create_parameter_selector("BS")
        # get best params from ps.search_parameters
        best_prm, score = ps.search_parameters(x, y, prm, 10, model,
                                               "accuracy")
        print(best_prm)
        print(score)

    def test_wine_quality_LASSO_BS(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-red.csv"
        # get df with loader creator
        scsv_type = self._loader_creator.create_loader(path, "SCSV")
        df = scsv_type.get_file_transformed()
        # create a prm variable to store params grid
        initial_prm = BayesianSearchParametersPossibilities.case("Lasso")
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("Lasso")
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a ps variable that stores a grid search object
        ps = self._param_search_creator.create_parameter_selector("BS")
        # get best params from ps.search_parameters
        best_prm, score = ps.search_parameters(x, y, initial_prm, 10,
                                               estimator, "r2")
        print(best_prm)
        print(score)

    def test_diabetes_lsvc_search_bs(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        x, y = SplitterReturner.split_x_y_from_df(df)
        # create a simple linearSVC estimator
        model = self._estimator_creator.create_estimator("LinearSVC")
        # create a prm variable that stores the param grid to search
        prm = BayesianSearchParametersPossibilities.case("LinearSVC")
        # create a ps variable that stores a bayesian search object
        ps = self._param_search_creator.create_parameter_selector("BS")
        # get best params from ps.search_parameters
        best_prm, _ = ps.search_parameters(x, y, prm, 10, model, "accuracy")
        print(best_prm)

    def test_wine_quality_LASSO_GS(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-white.csv"
        # get df with loader creator
        scsv_type = self._loader_creator.create_loader(path, "SCSV")
        df = scsv_type.get_file_transformed()
        # create a prm variable to store params grid
        initial_prm = GridSearchParametersPossibilities.case("Lasso")
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("Lasso")
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a ps variable that stores a grid search object
        ps = self._param_search_creator.create_parameter_selector("GS")
        # get best params from ps.search_parameters
        best_prm, _ = ps.search_parameters(x, y, initial_prm, 10, estimator,
                                           "r2")
        print(best_prm)

    def test_molecules_SVC_grid_search(self):
        # path to molecules.csv file in project
        path = ".\\..\\datasets\\molecules.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "TSV")
        df = csv_type.get_file_transformed()
        df = df.drop(["m_name"], axis=1)
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a simple SVC estimator
        model = self._estimator_creator.create_estimator("SVC")
        # create a prm variable that stores the param grid to search
        prm = GridSearchParametersPossibilities.case("SVC")
        # create a ps variable that stores a grid search object
        ps = self._param_search_creator.create_parameter_selector("GS")
        # get best params from ps.search_parameters
        best_prm, score = ps.search_parameters(x, y, prm, 10, model,
                                               "accuracy")
        print(best_prm, score)

    def test_diabetes_LSVC_grid_search(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a simple linearSVC estimator
        model = self._estimator_creator.create_estimator("LinearSVC")
        # create a prm variable that stores the param grid to search
        prm = GridSearchParametersPossibilities.case("LinearSVC")
        # create a ps variable that stores a grid search object
        ps = self._param_search_creator.create_parameter_selector("GS")
        # get best params from ps.search_parameters
        best_prm, score = ps.search_parameters(x, y, prm, 10, model,
                                               "accuracy")
        print(best_prm)
        print(score)
Esempio n. 5
0
 def _load_file(self) -> None:
     # data setter using JSONDataTypeLoader
     json_type = LoaderCreator.create_loader(self.file_path, "JSON")
     self.data = json_type.get_file_transformed()
Esempio n. 6
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()

    def test_data_is_df(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_file = self._loader_creator.create_loader(test_full_path, "csv")
        # get the dataframe from the data_returner
        this_is_a_df = csv_file.get_file_transformed()
        # use DataEnsurer and check if it is a dataframe with enough samples and features
        ensurer_bol = DataEnsurer.validate_pd_data(this_is_a_df)
        self.assertTrue(ensurer_bol)

    def test_data_is_not_df(self):
        not_a_df = {'name': 'notch', 'job': 'dev'}
        # is {'name': 'notch', 'job': 'dev'} a dataframe?
        ensurer_bol = DataEnsurer.validate_pd_data(not_a_df)
        # it should be false, since input is a dict
        self.assertFalse(ensurer_bol)

    def test_df_not_meeting_req_columns(self):
        dict_test = {'name': [str(i) + "name" for i in range(200)]}
        df = pd.DataFrame.from_dict(dict_test)
        # is {'name': ['0name', '1name', '2name' ...]} a dataframe after pd.DataFrame.from_dict ?
        ensurer_bol = DataEnsurer.validate_pd_data(df)
        # it should be false, since it doesnt have enough samples and features
        self.assertFalse(ensurer_bol)

    def test_json_is_list(self):
        json_type = self._loader_creator.create_loader(
            ".\\..\\resources\\json_info\\welcome_message.json", "json")
        file = json_type.get_file_transformed()
        # is the file a deserialized json list?
        ensurer_bol = DataEnsurer.validate_py_data(file, list)
        # this should be true, since welcome_message.json has list format
        self.assertTrue(ensurer_bol)

    def test_loader_path_is_str(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_file = self._loader_creator.create_loader(test_full_path, "csv")
        path = csv_file.file_path
        bol_answer = DataEnsurer.validate_py_data(path, str)
        self.assertTrue(bol_answer)

    def test_data_type_is_list(self):
        # initialization of welcome_message with its path
        welcome_message = WelcomeMessage(
            file_path="..\\resources\\json_info\\welcome_message.json",
            data_type=list)
        data = welcome_message.data  # get data value using its property
        bol_answer = DataEnsurer.validate_py_data(data, list)
        # is data a list?
        self.assertTrue(bol_answer)

    def test_data_is_a_corrupted_file_csv(self):
        with self.assertRaises(TypeError):
            # load diabetes.csv from disk
            folder_name = "datasets"
            file_name = "corrupted_file_test.txt"
            test_full_path = ".\\..\\" + folder_name + "\\" + file_name
            csv_file = self._loader_creator.create_loader(
                test_full_path, "csv")
            # get the dataframe from the data_returner
            this_is_not_a_df = csv_file.get_file_transformed()

    def test_data_is_a_corrupted_file_tsv(self):
        with self.assertRaises(TypeError):
            # load diabetes.csv from disk
            folder_name = "datasets"
            file_name = "corrupted_file_test.txt"
            test_full_path = ".\\..\\" + folder_name + "\\" + file_name
            csv_file = self._loader_creator.create_loader(
                test_full_path, "tsv")
            # get the dataframe from the data_returner
            this_is_not_a_df = csv_file.get_file_transformed()
Esempio n. 7
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()

    def test_single_split_columns_match(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        expected_y_len, expected_x_len = df.shape  # true prediction and data len with shape method
        # shape returns original column value. x doesn't have prediction column, so it must be original value - 1
        expected_x_len -= 1
        # use of splitterReturner with a NormalSplitter implementation
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # do the values match in both x and y dataframes
        self.assertEqual(len(x.columns), expected_x_len)
        self.assertEqual(len(y), expected_y_len)

    def test_single_split_returns_a_tuple(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        # use of splitterReturner with a NormalSplitter implementation
        splitter = SplitterReturner()
        # split dataframe into x and y
        data = splitter.split_x_y_from_df(df)
        result = DataEnsurer.validate_py_data(data, tuple)
        self.assertTrue(result)

    def test_single_split_x_and_y_is_a_dataframe_and_numpy_array(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        # use of splitterReturner with a NormalSplitter implementation
        splitter = SplitterReturner()
        # split dataframe into x and y
        data = splitter.split_x_y_from_df(df)
        results = [
            isinstance(data[0], pd.DataFrame),
            isinstance(data[-1], np.ndarray)
        ]
        # are all outputs True?
        for r in results:
            self.assertTrue(r)

    def test_train_test_split_size_zero_is_wrong(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        # use of splitterReturner with a NormalSplitter implementation
        with self.assertRaises(ValueError):
            splitter = SplitterReturner()
            # split dataframe into x and y, then use train_and_test_split
            x, y = splitter.split_x_y_from_df(df)
            _ = splitter.train_and_test_split(
                x, y, 0.0
            )  # 80 percent of data should be training and the other 20 is

    def test_train_test_split_size_less_than_zero_is_wrong(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        # this should raise a ValueError because size = -0.5 is not a valid number
        with self.assertRaises(ValueError):
            # use of splitterReturner with a NormalSplitter implementation
            splitter = SplitterReturner()
            # split dataframe into x and y, then use train_and_test_split
            x, y = splitter.split_x_y_from_df(df)
            _ = splitter.train_and_test_split(
                x, y, -0.5)  # -0.5 is not a valid value

    def test_split_into_x_and_y_is_not_a_valid_dataframe(self):
        # dummy dictionary
        temp_dict = {'x': [i for i in range(200)]}
        # transform dictionary to dataframe
        df = pd.DataFrame.from_dict(temp_dict)
        # this should raise a TypeError because dataframe doesnt meet column requirements
        with self.assertRaises(TypeError):
            splitter = SplitterReturner()
            _, _ = splitter.split_x_y_from_df(df)
Esempio n. 8
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()
    _feature_selector_creator = FeatureSelectorCreator()
    _estimator_creator = EstimatorCreator()

    def test_diabetes_has_fewer_features_with_LSVC_FFS_roc_auc_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "csv")
        df = csv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple LinearSVC estimator
        clf = self._estimator_creator.create_estimator("LinearSVC")
        clf.set_params(dual=False, random_state=0)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "roc_auc", 10)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_diabetes_has_fewer_features_with_LSVC_BFS_roc_auc_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "csv")
        df = csv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("BFS")
        # create a simple LinearSVC estimator
        clf = self._estimator_creator.create_estimator("LinearSVC")
        clf.set_params(dual=False, random_state=0)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "roc_auc", 10)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_diabetes_has_fewer_features_with_SVC_FFS_accuracy_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "csv")
        df = csv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple SVC estimator
        clf = self._estimator_creator.create_estimator("SVC")
        clf.set_params(random_state=0)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "accuracy", 10)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_molecules_has_fewer_features_with_SVC_BFS_accuracy_5(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        tsv_type = self._loader_creator.create_loader(test_full_path, "tsv")
        df = tsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        x = x.drop(["m_name"], axis=1)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("BFS")
        # create a simple SVC estimator
        clf = self._estimator_creator.create_estimator("SVC")
        clf.set_params(random_state=0)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "accuracy", 5)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_wine_quality_has_fewer_features_with_LSVR_FFS_r2_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple LSVR estimator
        clf = self._estimator_creator.create_estimator("LinearSVR")
        clf.set_params(max_iter=20000,
                       dual=False,
                       loss="squared_epsilon_insensitive")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "r2", 10)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_wine_quality_has_fewer_features_with_SVR_BFS_explained_variance_5(
            self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-white.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("BFS")
        # create a simple SVR estimator
        clf = self._estimator_creator.create_estimator("SVR")
        clf.set_params(gamma="auto")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "explained_variance", 5)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_wine_quality_has_fewer_features_with_LASSO_FFS_r2_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple Lasso estimator
        clf = self._estimator_creator.create_estimator("Lasso")
        prm = {
            'alpha': 1.0,
            'random_state': 8,
            'selection': 'cyclic',
            'tol': 0.0001
        }
        clf.set_params(**prm)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "r2", 10)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        print("lasso", is_fewer_than_original)

    def test_wine_quality_has_fewer_features_with_LASSO_FFS_explained_variance_10(
            self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple Lasso estimator
        clf = self._estimator_creator.create_estimator("Lasso")
        prm = {
            'alpha': 1.0,
            'random_state': 8,
            'selection': 'cyclic',
            'tol': 0.0001
        }
        clf.set_params(**prm)
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "explained_variance", 10)
        print(new_x.columns.values, f"\n{score}")
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        print("lasso", is_fewer_than_original)

    def test_iris_has_fewer_features_with_KMEANS_FFS_mutual_info_score_5(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "iris.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "csv")
        df = csv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple Kmeans estimator
        clf = self._estimator_creator.create_estimator("KMeans")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "mutual_info_score", 5)
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # this should be True
        self.assertTrue(is_fewer_than_original)

    def test_iris_has_fewer_features_with_MEANSHIFT_BFS_mutual_info_score_10(
            self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "iris.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "csv")
        df = csv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        _, len_original_y = x.shape
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("BFS")
        # create a simple MeanShift estimator
        clf = self._estimator_creator.create_estimator("MeanShift")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "mutual_info_score", 10)
        print(new_x.columns.values, f"\n{score}")
        _, len_new_y = new_x.shape
        # does it have fewer features?
        is_fewer_than_original: bool = True if len_new_y < len_original_y else False
        # for this dataset and estimator with bfs all of the features are necessary
        print(is_fewer_than_original)

    def test_wine_quality_with_LSVR_FFS_neg_mean_squared_error_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("FFS")
        # create a simple LSVR estimator
        clf = self._estimator_creator.create_estimator("LinearSVR")
        clf.set_params(max_iter=20000,
                       dual=False,
                       loss="squared_epsilon_insensitive")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "neg_mean_squared_error",
                                          10)
        print(new_x.columns.values, f"\n{score}")

    def test_wine_quality_with_LSVR_BFS_neg_mean_squared_error_10(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "scsv")
        df = scsv_type.get_file_transformed()
        # get x and y from SplitterReturner
        x, y = SplitterReturner.split_x_y_from_df(df)
        # create a feature selector
        fs = self._feature_selector_creator.create_feature_selector("BFS")
        # create a simple LSVR estimator
        clf = self._estimator_creator.create_estimator("LinearSVR")
        clf.set_params(max_iter=20000,
                       dual=False,
                       loss="squared_epsilon_insensitive")
        # get new_x with new features
        new_x, score = fs.select_features(x, y, clf, "neg_mean_squared_error",
                                          10)
        print(new_x.columns.values, f"\n{score}")
Esempio n. 9
0
class MyTestCase(unittest.TestCase):
    _estimator_creator = EstimatorCreator()
    _loader_creator = LoaderCreator()
    _model_creator = SBSModelCreator()
    _feature_selection_creator = FeatureSelectorCreator()
    _parameter_selection_creator = ParameterSearchCreator()

    def test_finishes_creation(self):
        try:
            _ = FCreator(".\\")
            self.assertTrue(True)
        except():
            self.assertTrue(False)

    def test_markdown_file_creates_console_info(self):
        lots_of_info = ["test info first paragraph", "this comes after a jump line", "",
                        "this comes after two jump lines "]
        SBSResult.console_info(lots_of_info, ".\\SBS_ML_1")

    def test_markdown_file_creates_console_info_overwrite(self):
        lots_of_info = ["test info first paragraph", "this comes after a jump line", "",
                        "this comes after two jump lines", "this is new text for overwriting purposes"]
        SBSResult.console_info(lots_of_info, ".\\SBS_ML_1")

    def test_markdown_file_creates_estimator_info(self):
        file_creator_obj = FCreator()
        uses_parameter_search, uses_feature_selection = True, False
        model_instance = self._model_creator.create_model(uses_feature_selection, uses_parameter_search)
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("KNeighborsClassifier")
        # set model instance attributes
        model_instance.initial_parameters = BayesianSearchParametersPossibilities.case("KNeighborsClassifier")
        model_instance.estimator = estimator
        model_instance.parameter_selector = self._parameter_selection_creator.create_parameter_selector("BS")
        model_instance.data_frame = df
        score = model_instance.score_model("accuracy", 10)
        print("score:", score)
        print("best params", model_instance.best_parameters)
        print("best features", model_instance.best_features)
        score_text = "rendimiento promedio \"accuracy\":" + " " + str(score)

        info = ["Opción", "Selección",
                "Tipo de predicción", "Classification",
                "Estimador", model_instance.estimator.__class__.__name__,
                "Selección de características", "No" if model_instance.feature_selector is None
                else model_instance.feature_selector.__class__.__name__,
                "Selección de hiperparámetros", "No" if model_instance.parameter_selector is None
                else model_instance.parameter_selector.__class__.__name__
                ]
        table = {"columns": 2, "rows": 5, "info": info}
        folder_path = file_creator_obj.folder_path
        SBSResult.estimator_info(table,
                                 list(model_instance.best_features),
                                 model_instance.initial_parameters,
                                 model_instance.best_parameters,
                                 score_text,
                                 folder_path)
        """
        score: 0.7604166666666666
        best params OrderedDict([('algorithm', 'kd_tree'), ('leaf_size', 30), ('n_neighbors', 13), ('p', 1),
         ('weights', 'uniform')])
        best features ['Pregnancies' 'Glucose' 'BloodPressure' 'SkinThickness' 'Insulin' 'BMI'
         'DiabetesPedigreeFunction' 'Age']
        """

    def test_markdown_file_creates_estimator_and_console_info(self):
        old_stdout = sys.stdout
        sys.stdout = my_stdout = StringIO()
        file_creator_obj = FCreator()
        uses_parameter_search, uses_feature_selection = True, True
        model_instance = self._model_creator.create_model(uses_feature_selection, uses_parameter_search)
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("KNeighborsClassifier")
        # set model instance attributes
        model_instance.initial_parameters = BayesianSearchParametersPossibilities.case("KNeighborsClassifier")
        model_instance.estimator = estimator
        model_instance.parameter_selector = self._parameter_selection_creator.create_parameter_selector("BS")
        model_instance.feature_selector = self._feature_selection_creator.create_feature_selector("BFS")
        model_instance.data_frame = df
        score = model_instance.score_model("accuracy", 10)
        score_text = "rendimiento promedio \"accuracy\":" + " " + str(score)

        info = ["Opción", "Selección",
                "Tipo de predicción", "Clasificación",
                "Estimador", model_instance.estimator.__class__.__name__,
                "Selección de características", "No" if model_instance.feature_selector is None
                else model_instance.feature_selector.__class__.__name__,
                "Selección de hiperparámetros", "No" if model_instance.parameter_selector is None
                else model_instance.parameter_selector.__class__.__name__
                ]
        table = {"columns": 2, "rows": 5, "info": info}
        folder_path = file_creator_obj.folder_path
        SBSResult.estimator_info(table,
                                 list(model_instance.best_features),
                                 model_instance.initial_parameters,
                                 model_instance.best_parameters,
                                 score_text,
                                 folder_path)

        sys.stdout = old_stdout
        console_output = my_stdout.getvalue()
        SBSResult.console_info(console_output.split("\n"), folder_path, 0)

    def test_markdown_file_creates_estimator_and_console_info_with_wine_quality_red_sgd_bfs(self):
        old_stdout = sys.stdout
        sys.stdout = my_stdout = StringIO()
        file_creator_obj = FCreator()
        uses_parameter_search, uses_feature_selection = False, True
        model_instance = self._model_creator.create_model(uses_feature_selection, uses_parameter_search)
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-red-coma.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("SGDClassifier")
        # set model instance attributes
        model_instance.initial_parameters = {'penalty': 'elasticnet',
                                             'alpha': 0.01,
                                             'tol': 0.0001,
                                             'random_state': 512}
        model_instance.estimator = estimator
        model_instance.feature_selector = self._feature_selection_creator.create_feature_selector("BFS")
        model_instance.data_frame = df
        score = model_instance.score_model("neg_mean_squared_error", 10)
        score_text = "rendimiento promedio \"neg_mean_squared_error\":" + " " + str(score)

        info = ["Opción", "Selección",
                "Tipo de predicción", "Clasificación",
                "Estimador", model_instance.estimator.__class__.__name__,
                "Selección de características", "No" if model_instance.feature_selector is None
                else model_instance.feature_selector.__class__.__name__,
                "Selección de hiperparámetros", "No" if model_instance.parameter_selector is None
                else model_instance.parameter_selector.__class__.__name__
                ]
        table = {"columns": 2, "rows": 5, "info": info}
        folder_path = file_creator_obj.folder_path
        SBSResult.estimator_info(table,
                                 list(model_instance.best_features),
                                 model_instance.initial_parameters,
                                 model_instance.best_parameters,
                                 score_text,
                                 folder_path)

        sys.stdout = old_stdout
        console_output = my_stdout.getvalue()
        SBSResult.console_info(console_output.split("\n"), folder_path, 0)

    def test_markdown_file_dumps_estimator_and_creates_estimator_and_console_info_with_wine_quality_red_sgd_ffs(self):
        old_stdout = sys.stdout
        sys.stdout = my_stdout = StringIO()
        file_creator_obj = FCreator()
        uses_parameter_search, uses_feature_selection = False, True
        model_instance = self._model_creator.create_model(uses_feature_selection, uses_parameter_search)
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-red-coma.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # create an estimator using EstimatorCreator
        estimator = self._estimator_creator.create_estimator("SGDClassifier")
        # set model instance attributes
        model_instance.initial_parameters = {'penalty': 'elasticnet',
                                             'alpha': 0.01,
                                             'tol': 0.0001,
                                             'random_state': 512}
        model_instance.estimator = estimator
        model_instance.feature_selector = self._feature_selection_creator.create_feature_selector("FFS")
        model_instance.data_frame = df
        score = model_instance.score_model("neg_mean_squared_error", 10)
        score_text = "rendimiento promedio \"neg_mean_squared_error\":" + " " + str(score)

        info = ["Opción", "Selección",
                "Tipo de predicción", "Clasificación",
                "Estimador", model_instance.estimator.__class__.__name__,
                "Selección de características", "No" if model_instance.feature_selector is None
                else model_instance.feature_selector.__class__.__name__,
                "Selección de hiperparámetros", "No" if model_instance.parameter_selector is None
                else model_instance.parameter_selector.__class__.__name__
                ]
        table = {"columns": 2, "rows": 5, "info": info}
        folder_path = file_creator_obj.folder_path
        SBSResult.estimator_info(table,
                                 list(model_instance.best_features),
                                 model_instance.initial_parameters,
                                 model_instance.best_parameters,
                                 score_text,
                                 folder_path)
        SBSResult.dump_estimator(model_instance.estimator, folder_path)
        sys.stdout = old_stdout
        console_output = my_stdout.getvalue()
        SBSResult.console_info(console_output.split("\n"), folder_path, 0)
Esempio n. 10
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()
    _estimator_creator = EstimatorCreator()

    def test_score_type_raises_ValueError(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple linearSVC estimator
        model = self._estimator_creator.create_estimator("LinearSVC")
        model.set_params(dual=False, random_state=0)
        with self.assertRaises(ValueError):
            # get score from a linearSVC estimator with roc_auc score and 10 folds
            _ = cv_score.get_score(x, y, model, "roc", 10)

    def test_n_folds_validation_raises_ValueError(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple linearSVC estimator
        model = self._estimator_creator.create_estimator("LinearSVC")
        model.set_params(dual=False, random_state=0)
        with self.assertRaises(ValueError):
            # get score from a linearSVC estimator with roc_auc score and 10 folds
            _ = cv_score.get_score(x, y, model, "roc_auc", 2)

    def test_n_folds_validation_and_score_type_raises_ValueError(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple linearSVC estimator
        model = self._estimator_creator.create_estimator("LinearSVC")
        model.set_params(dual=False, random_state=0)
        with self.assertRaises(ValueError):
            # get score from a linearSVC estimator with roc_auc score and 10 folds
            _ = cv_score.get_score(x, y, model, "roc", 2)

    def test_cv_score_is_more_than_zero_with_LSVC_SVC_KNN_GNB_accuracy_5(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\diabetes.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple a svc, knn and gnb estimator
        model_1 = self._estimator_creator.create_estimator("SVC")
        model_2 = self._estimator_creator.create_estimator(
            "KNeighborsClassifier")
        model_3 = self._estimator_creator.create_estimator("GaussianNB")
        model_4 = self._estimator_creator.create_estimator("LinearSVC")
        estimators = [
            model_1, model_2, model_3,
            model_4.set_params(dual=False)
        ]
        # get score from a linearSVC estimator with accuracy score and 5folds
        bol_results = []
        for clf in estimators:
            score = cv_score.get_score(x, y, clf, "accuracy", 5)
            print(clf.__class__.__name__, "score is:", score)
            is_greater_than_zero: bool = True if score > 0 else False
            bol_results.append(is_greater_than_zero)
        # any will return True if there's any truth value in the iterable.
        print(bol_results)
        answer = all(bol_results)
        # all of this should be true
        self.assertTrue(answer)

    def test_cv_score_is_more_than_zero_with_LSVR_SVR_LASSO_SGD_r2_5(self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-red.csv"
        # get df with loader creator
        scsv_type = self._loader_creator.create_loader(path, "SCSV")
        df = scsv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple a svc, knn and gnb estimator
        model_1 = self._estimator_creator.create_estimator("LinearSVR")
        model_2 = self._estimator_creator.create_estimator("SVR")
        model_3 = self._estimator_creator.create_estimator("Lasso")
        model_4 = self._estimator_creator.create_estimator("SGDClassifier")
        estimators = [model_1, model_2, model_3, model_4]
        # get score from a linearSVC estimator with accuracy score and 5folds
        bol_results = []
        for clf in estimators:
            score = cv_score.get_score(x, y, clf, "r2", 5)
            print(clf.__class__.__name__, "score is:", score)
            is_greater_than_zero: bool = True if score > 0 else False
            bol_results.append(is_greater_than_zero)
        print(bol_results)
        # there is at least one true element, which means on of the scores is greater than 0
        self.assertTrue(any(bol_results))

    def test_cv_score_is_more_than_zero_with_LSVR_SVR_LASSO_SGD_explained_variance_5(
            self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\winequality-white.csv"
        # get df with loader creator
        scsv_type = self._loader_creator.create_loader(path, "SCSV")
        df = scsv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple a svc, knn and gnb estimator
        model_1 = self._estimator_creator.create_estimator("LinearSVR")
        model_2 = self._estimator_creator.create_estimator("SVR")
        model_3 = self._estimator_creator.create_estimator("Lasso")
        model_4 = self._estimator_creator.create_estimator("SGDClassifier")
        estimators = [model_1, model_2, model_3, model_4]
        # get score from a linearSVC estimator with accuracy score and 5folds
        bol_results = []
        for clf in estimators:
            score = cv_score.get_score(x, y, clf, "explained_variance", 5)
            print(clf.__class__.__name__, "score is:", score)
            is_greater_than_zero: bool = True if score > 0 else False
            bol_results.append(is_greater_than_zero)
        print(bol_results)
        # there is at least one true element, which means on of the scores is greater than 0
        self.assertTrue(any(bol_results))

    def test_cv_score_is_more_than_zero_with_APROPAGATION_KMEANS_MINIKMEANS_MEANSHIFT_mutual_info_score_5(
            self):
        # path to diabetes.csv file in project
        path = ".\\..\\datasets\\iris.csv"
        # get df with loader creator
        csv_type = self._loader_creator.create_loader(path, "CSV")
        df = csv_type.get_file_transformed()
        # split df into x and y
        splitter = SplitterReturner()
        x, y = splitter.split_x_y_from_df(df)
        # create a CVScore object with its path and data type
        cv_score = CVScore()
        # create a simple a svc, knn and gnb estimator
        model_1 = self._estimator_creator.create_estimator(
            "AffinityPropagation")
        model_2 = self._estimator_creator.create_estimator("KMeans")
        model_3 = self._estimator_creator.create_estimator("MiniBatchKMeans")
        model_4 = self._estimator_creator.create_estimator("MeanShift")
        estimators = [
            model_1.set_params(random_state=0), model_2, model_3, model_4
        ]
        # get score from a linearSVC estimator with accuracy score and 5folds
        bol_results = []
        for clf in estimators:
            score = cv_score.get_score(x, y, clf, "mutual_info_score", 5)
            print(clf.__class__.__name__, "score is:", score)
            is_greater_than_zero: bool = True if score > 0 else False
            bol_results.append(is_greater_than_zero)
        print(bol_results)
Esempio n. 11
0
class MyTestCase(unittest.TestCase):
    _loader_creator = LoaderCreator()

    def test_data_loaded_csv(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        csv_type = self._loader_creator.create_loader(test_full_path, "CSV")
        df = csv_type.get_file_transformed()
        df_column_len = len(df.columns)
        # do the values match?
        self.assertEqual(df_column_len, 9)

    def test_data_loaded_scsv(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        scsv_type = self._loader_creator.create_loader(test_full_path, "SCSV")
        df = scsv_type.get_file_transformed()
        df_column_len = len(df.columns)
        # do the values match?
        self.assertEqual(df_column_len, 12)

    def test_data_load_tsv(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        tsv_type = self._loader_creator.create_loader(test_full_path, "TSV")
        df = tsv_type.get_file_transformed()
        df_column_len = len(df.columns)
        # do the values match?
        self.assertEqual(df_column_len, 34)

    def test_wrong_sep_for_tsv_file(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(TypeError):
            # get dataframe using LoaderCreator
            tsv_type = self._loader_creator.create_loader(
                test_full_path, "CSV")
            # this should raise an TypeError
            _ = tsv_type.get_file_transformed()

    def test_wrong_sep_for_scsv_file(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "winequality-red.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(TypeError):
            # get dataframe using LoaderCreator
            scsv_type = self._loader_creator.create_loader(
                test_full_path, "CSV")
            # this should raise an TypeError
            _ = scsv_type.get_file_transformed()

    def test_wrong_sep_for_csv_file(self):
        # load diabetes.csv from disk
        folder_name = "datasets"
        file_name = "diabetes.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(TypeError):
            csv_type = self._loader_creator.create_loader(
                test_full_path, "TSV")
            # this should raise an TypeError
            _ = csv_type.get_file_transformed()

    def test_wrong_path_csv_file(self):
        # load mol.csv from disk. This file does not exist
        folder_name = "datasets"
        file_name = "mol.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(FileNotFoundError):
            # get dataframe using LoaderCreator
            csv_type = self._loader_creator.create_loader(
                test_full_path, "CSV")
            # this should raise an FileNotFoundError
            _ = csv_type.get_file_transformed()

    def test_wrong_path_tsv_file(self):
        # load mol.csv from disk. This file does not exist
        folder_name = "datasets"
        file_name = "mol.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(FileNotFoundError):
            # get dataframe using LoaderCreator
            tsv_type = self._loader_creator.create_loader(
                test_full_path, "TSV")
            # this should raise an FileNotFoundError
            _ = tsv_type.get_file_transformed()

    def test_wrong_path_scsv_file(self):
        # load mol.csv from disk. This file does not exist
        folder_name = "datasets"
        file_name = "mol.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(FileNotFoundError):
            # get dataframe using LoaderCreator
            scsv_type = self._loader_creator.create_loader(
                test_full_path, "SCSV")
            # this should raise an FileNotFoundError
            _ = scsv_type.get_file_transformed()

    def test_creator_value_is_wrong(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(AttributeError):
            # get dataframe using LoaderCreator. This should raise a ValueError
            _ = self._loader_creator.create_loader(test_full_path, "txt")

    def test_creator_value_with_no_capital_letters_is_right(self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        tsv_type = self._loader_creator.create_loader(test_full_path, "tsv")
        df = tsv_type.get_file_transformed()
        df_column_len = len(df.columns)
        # do the values match?
        self.assertEqual(df_column_len, 34)

    def test_creator_value_with_no_capital_letters_and_white_space_is_right(
            self):
        # load molecules.csv from disk
        folder_name = "datasets"
        file_name = "molecules.csv"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        # get dataframe using LoaderCreator
        tsv_type = self._loader_creator.create_loader(test_full_path, " tsv ")
        df = tsv_type.get_file_transformed()
        df_column_len = len(df.columns)
        # do the values match?
        self.assertEqual(df_column_len, 34)

    def test_loader_creator_types_are_correct(self):
        # check for available types
        loader_types = self._loader_creator.get_available_types()
        expected_types = ("CSV", "TSV", "JSON", "SCSV")
        results = [True for i in loader_types if i in expected_types]
        bol_answer = all(results)
        # this should assert true
        self.assertTrue(bol_answer)

    def test_file_is_not_a_dataset_it_is_a_json(self):
        # load mol.csv from disk. This file does not exist
        folder_name = "json_info"
        file_name = "help_message.json"
        test_full_path = ".\\..\\" + folder_name + "\\" + file_name
        with self.assertRaises(Exception):
            # get dataframe using LoaderCreator
            csv_type = self._loader_creator.create_loader(
                test_full_path, "CSV")
            # this should raise an FileNotFoundError
            _ = csv_type.get_file_transformed()