Beispiel #1
0
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set': 4,
            'logistic': True,
            'kfold': False,
            'linear': True,
            'knn': 'scikit',
            'training_features': [
                'gpa',
                'gre'
            ],
            'target_feature': 'admit',
            'multi_class_features': '',
            'exclude_non_numeric': '',
            'exclude_non_ordinal': '',
            'exclude_out_of_scope': '',
            'cleanse_price_format_features': '',
            'convert_feature_words_to_digits': '',
            'kmeans': True,
            'kmeans_qty': 5,
            'affiliation_feature': '',
            'kfold_qty': 10,
            'hyper_optim': True,
            'hyper_optim_range': 20,
            'suppress_all_plots': False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)
Beispiel #2
0
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            2,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features': [
                'num-of-doors', 'curb-weight', 'horsepower', 'city-mpg',
                'highway-mpg'
            ],
            'target_feature':
            'price',
            'multi_class_features':
            '',
            'exclude_non_numeric': [
                'make', 'fuel-type', 'aspiration', 'body-style',
                'drive-wheels', 'engine-location', 'engine-type', 'fuel-system'
            ],
            'exclude_non_ordinal':
            '',
            'exclude_out_of_scope': ['symboling', 'normalized-losses'],
            'cleanse_price_format_features': ['price'],
            'convert_feature_words_to_digits':
            ['num-of-doors', 'num-of-cylinders'],
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)
class MainExample4NonLogisticTestCase(unittest.TestCase):
    """Tests for `main.py`."""

    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set': 6,
            'logistic': False,
            'kfold': True,
            'linear': True,
            'knn': 'scikit',
            'training_features': [
                'yearpublished',
                'minplaytime',
                'minage',
                'total_wanters',
                'average_weight'
            ],
            'target_feature': 'average_rating',
            'multi_class_features': '',
            'exclude_non_numeric': [
                'type,',
                'name'
            ],
            'exclude_non_ordinal': '',
            'exclude_out_of_scope': [
                'id'
            ],
            'cleanse_price_format_features': '',
            'convert_feature_words_to_digits': '',
            'kmeans': True,
            'kmeans_qty': 5,
            'affiliation_feature': 'yearpublished',
            'kfold_qty': 10,
            'hyper_optim': True,
            'hyper_optim_range': 20,
            'suppress_all_plots': False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': None,
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 1.2042523903063078
                },
                'post-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'feature_names': 'yearpublished__minage__total_wanters__average_weight',
                    'rmse': 1.2096966125815165,
                    'k_neighbors_qty': 1,
                    'k_folds_qty': 10,
                    'k_fold_cross_validation_toggle': True
                }
            },
            'knn': {
                'feature_names': 'yearpublished__minplaytime__minage__total_wanters__average_weight',
                'rmse': 1.1749918955167673,
                'k_neighbors_qty': 15,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': True
            }
        }

        res = self.process_cli.main(self.event_mod, None)

        self.assertGreater(res['linear']['pre-hyperparameter_optimisation']['rmse'], 1.1)
        self.assertLess(res['linear']['pre-hyperparameter_optimisation']['rmse'], 1.4)
        self.assertGreater(res['linear']['post-hyperparameter_optimisation']['rmse'], 1.1)
        self.assertLess(res['linear']['post-hyperparameter_optimisation']['rmse'], 1.4)
        self.assertGreater(res['knn']['rmse'], 1.1)
        self.assertLess(res['knn']['rmse'], 1.4)
Beispiel #4
0
class MainExample4NonLogisticTestCase(unittest.TestCase):
    """Tests for `main.py`."""
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            5,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features': [
                'vote-bill1', 'vote-bill4', 'vote-bill5', 'vote-bill6',
                'vote-bill7', 'vote-bill8'
            ],
            'target_feature':
            'extremism',
            'multi_class_features':
            '',
            'exclude_non_numeric':
            '',
            'exclude_non_ordinal':
            '',
            'exclude_out_of_scope':
            '',
            'cleanse_price_format_features':
            '',
            'convert_feature_words_to_digits':
            '',
            'kmeans':
            True,
            'kmeans_qty':
            2,
            'affiliation_feature':
            'party',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': None,
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 4.409405296207438
                },
                'post-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'feature_names': 'vote-bill1__vote-bill4__vote-bill7',
                    'rmse': 3.8974455631403573,
                    'k_neighbors_qty': 1,
                    'k_folds_qty': 10,
                    'k_fold_cross_validation_toggle': True
                }
            },
            'knn': {
                'feature_names': 'vote-bill1__vote-bill4__vote-bill7',
                'rmse': 3.8689053880562674,
                'k_neighbors_qty': 5,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': True
            }
        }

        self.assertEqual(self.process_cli.main(self.event_mod, None),
                         expected_result)
Beispiel #5
0
class MainExample2TestCase(unittest.TestCase):
    """Tests for `main.py`."""
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            2,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features': [
                'num-of-doors', 'curb-weight', 'horsepower', 'city-mpg',
                'highway-mpg'
            ],
            'target_feature':
            'price',
            'multi_class_features':
            '',
            'exclude_non_numeric': [
                'make', 'fuel-type', 'aspiration', 'body-style',
                'drive-wheels', 'engine-location', 'engine-type', 'fuel-system'
            ],
            'exclude_non_ordinal':
            '',
            'exclude_out_of_scope': ['symboling', 'normalized-losses'],
            'cleanse_price_format_features': ['price'],
            'convert_feature_words_to_digits':
            ['num-of-doors', 'num-of-cylinders'],
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': None,
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 4276.331113497754
                },
                'post-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'feature_names': 'num-of-doors__curb-weight',
                    'rmse': 4235.3551812093447,
                    'k_neighbors_qty': 1,
                    'k_folds_qty': 10,
                    'k_fold_cross_validation_toggle': True
                }
            },
            'knn': {
                'feature_names': 'num-of-doors__curb-weight',
                'rmse': 3945.9183360130905,
                'k_neighbors_qty': 6,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': True
            }
        }

        res = self.process_cli.main(self.event_mod, None)

        self.assertEqual(
            res['linear']['pre-hyperparameter_optimisation'],
            expected_result['linear']['pre-hyperparameter_optimisation'])
        self.assertAlmostEqual(
            res['linear']['post-hyperparameter_optimisation']['rmse'],
            expected_result['linear']['post-hyperparameter_optimisation']
            ['rmse'])
        self.assertEqual(res['knn'], expected_result['knn'])
Beispiel #6
0
class MainExample3LogisticTestCase(unittest.TestCase):
    """Tests for `main.py`."""
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            3,
            'logistic':
            True,
            'kfold':
            False,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features': [
                'cylinders', 'displacement', 'horsepower', 'weight',
                'acceleration', 'model-year', 'car-name'
            ],
            'target_feature':
            'origin',
            'multi_class_features': ['cylinders', 'model-year'],
            'exclude_non_numeric': ['car-name'],
            'exclude_non_ordinal':
            '',
            'exclude_out_of_scope':
            '',
            'cleanse_price_format_features':
            '',
            'convert_feature_words_to_digits':
            '',
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'logistic',
                    'rmse': None,
                    'accuracy': 0.0792838874680307,
                    'sensitivity': 0.5,
                    'specificity': 0.5,
                    'auc_score': None
                }
            },
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 0.6360193336863021
                }
            },
            'knn': {
                'feature_names': 'cyl_3__cyl_4__cyl_5__mod_74__mod_78',
                'rmse': 0.8799866005988728,
                'k_neighbors_qty': 6,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': False
            }
        }

        self.assertEqual(self.process_cli.main(self.event_mod, None),
                         expected_result)
Beispiel #7
0
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            1,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features':
            ['accommodates', 'bedrooms', 'bathrooms', 'number_of_reviews'],
            'target_feature':
            'price',
            'multi_class_features':
            '',
            'exclude_non_numeric': [
                'host_response_time', 'host_response_rate',
                'host_acceptance_rate', 'host_is_superhost',
                'host_listings_count', 'host_verifications',
                'host_has_profile_pic', 'host_identity_verified',
                'property_type', 'room_type', 'bed_type', 'amenities',
                'calendar_updated', 'has_availability', 'requires_license',
                'license', 'instant_bookable', 'cancellation_policy',
                'require_guest_profile_picture',
                'require_guest_phone_verification'
            ],
            'exclude_non_ordinal': ['latitude', 'longitude', 'zipcode'],
            'exclude_out_of_scope': [
                'id', 'listing_url', 'scrape_id', 'last_scraped', 'name',
                'summary', 'space', 'description', 'experiences_offered',
                'neighborhood_overview', 'notes', 'transit', 'thumbnail_url',
                'medium_url', 'picture_url', 'xl_picture_url', 'host_id',
                'host_url', 'host_name', 'host_since', 'host_location',
                'host_about', 'host_thumbnail_url', 'host_picture_url',
                'host_neighbourhood', 'street', 'neighbourhood',
                'neighbourhood_cleansed', 'neighbourhood_group_cleansed',
                'city', 'state', 'market', 'smart_location', 'country_code',
                'country', 'is_location_exact', 'calendar_last_scraped',
                'first_review', 'last_review', 'jurisdiction_names'
            ],
            'cleanse_price_format_features': [
                'price', 'weekly_price', 'monthly_price', 'security_deposit',
                'cleaning_fee', 'extra_people'
            ],
            'convert_feature_words_to_digits':
            '',
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)
Beispiel #8
0
class MainExample1TestCase(unittest.TestCase):
    """Tests for `main.py`."""
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            1,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features':
            ['accommodates', 'bedrooms', 'bathrooms', 'number_of_reviews'],
            'target_feature':
            'price',
            'multi_class_features':
            '',
            'exclude_non_numeric': [
                'host_response_time', 'host_response_rate',
                'host_acceptance_rate', 'host_is_superhost',
                'host_listings_count', 'host_verifications',
                'host_has_profile_pic', 'host_identity_verified',
                'property_type', 'room_type', 'bed_type', 'amenities',
                'calendar_updated', 'has_availability', 'requires_license',
                'license', 'instant_bookable', 'cancellation_policy',
                'require_guest_profile_picture',
                'require_guest_phone_verification'
            ],
            'exclude_non_ordinal': ['latitude', 'longitude', 'zipcode'],
            'exclude_out_of_scope': [
                'id', 'listing_url', 'scrape_id', 'last_scraped', 'name',
                'summary', 'space', 'description', 'experiences_offered',
                'neighborhood_overview', 'notes', 'transit', 'thumbnail_url',
                'medium_url', 'picture_url', 'xl_picture_url', 'host_id',
                'host_url', 'host_name', 'host_since', 'host_location',
                'host_about', 'host_thumbnail_url', 'host_picture_url',
                'host_neighbourhood', 'street', 'neighbourhood',
                'neighbourhood_cleansed', 'neighbourhood_group_cleansed',
                'city', 'state', 'market', 'smart_location', 'country_code',
                'country', 'is_location_exact', 'calendar_last_scraped',
                'first_review', 'last_review', 'jurisdiction_names'
            ],
            'cleanse_price_format_features': [
                'price', 'weekly_price', 'monthly_price', 'security_deposit',
                'cleaning_fee', 'extra_people'
            ],
            'convert_feature_words_to_digits':
            '',
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': None,
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 111.78870076908206
                },
                'post-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'feature_names': 'accommodates__bedrooms__bathrooms',
                    'rmse': 109.86142964502258,
                    'k_neighbors_qty': 1,
                    'k_folds_qty': 10,
                    'k_fold_cross_validation_toggle': True
                }
            },
            'knn': {
                'feature_names': 'accommodates__bedrooms__bathrooms',
                'rmse': 109.65178843975404,
                'k_neighbors_qty': 20,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': True
            }
        }

        self.assertEqual(self.process_cli.main(self.event_mod, None),
                         expected_result)
Beispiel #9
0
class MainExample4NonLogisticTestCase(unittest.TestCase):
    """Tests for `main.py`."""

    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set': 4,
            'logistic': True,
            'kfold': False,
            'linear': True,
            'knn': 'scikit',
            'training_features': [
                'gpa',
                'gre'
            ],
            'target_feature': 'admit',
            'multi_class_features': '',
            'exclude_non_numeric': '',
            'exclude_non_ordinal': '',
            'exclude_out_of_scope': '',
            'cleanse_price_format_features': '',
            'convert_feature_words_to_digits': '',
            'kmeans': True,
            'kmeans_qty': 5,
            'affiliation_feature': '',
            'kfold_qty': 10,
            'hyper_optim': True,
            'hyper_optim_range': 20,
            'suppress_all_plots': False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'logistic',
                    'rmse': 0.393589629343064,
                    'accuracy': 0.782608695652174,
                    'sensitivity': 0.5,
                    'specificity': 0.5,
                    'auc_score': 0.85679303278688534
                }
            },
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 0.39200813287845615
                }
            },
            'knn': {
                'feature_names': 'gpa__gre',
                'rmse': 0.6196196990320526,
                'k_neighbors_qty': 8,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': False
            }
        }

        self.assertEqual(self.process_cli.main(self.event_mod, None), expected_result)
class MainExample3NonLogisticTestCase(unittest.TestCase):
    """Tests for `main.py`."""
    def setUp(self):
        from main import ProcessCLI
        # Obtain from terminal log when run main.py runs `vars(parser.parse_args())`
        args = {
            'data_set':
            3,
            'logistic':
            False,
            'kfold':
            True,
            'linear':
            True,
            'knn':
            'scikit',
            'training_features': [
                'cylinders', 'displacement', 'horsepower', 'weight',
                'acceleration', 'model-year', 'car-name'
            ],
            'target_feature':
            'mpg',
            'multi_class_features':
            '',
            'exclude_non_numeric': ['car-name'],
            'exclude_non_ordinal':
            '',
            'exclude_out_of_scope':
            '',
            'cleanse_price_format_features':
            '',
            'convert_feature_words_to_digits':
            '',
            'kmeans':
            True,
            'kmeans_qty':
            5,
            'affiliation_feature':
            '',
            'kfold_qty':
            10,
            'hyper_optim':
            True,
            'hyper_optim_range':
            20,
            'suppress_all_plots':
            False
        }

        # Suppress all plots
        args["suppress_all_plots"] = True
        self.process_cli = ProcessCLI(**args)
        self.event_mod = self.process_cli.map_cli_args_to_event_config(EVENT)

    def tearDown(self):
        del self.process_cli
        del self.event_mod

    def test_valid_config_responds_with_expected_results(self):

        expected_result = {
            'logistic': None,
            'linear': {
                'pre-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'rmse': 6.002793054130211
                },
                'post-hyperparameter_optimisation': {
                    'model_type': 'linear',
                    'feature_names': 'acceleration__model-year',
                    'rmse': 6.0241861168232917,
                    'k_neighbors_qty': 1,
                    'k_folds_qty': 10,
                    'k_fold_cross_validation_toggle': True
                }
            },
            'knn': {
                'feature_names': 'acceleration__model-year',
                'rmse': 5.75101716901536,
                'k_neighbors_qty': 20,
                'k_folds_qty': 10,
                'k_fold_cross_validation_toggle': True
            }
        }

        self.assertEqual(self.process_cli.main(self.event_mod, None),
                         expected_result)