def main(): ds = DataSet() with open(INPUT_DATA_FILE, "r", newline='', encoding="utf8") as csv_file: ds.extract_from_csv(csv_file) # X_train, X_test, y_train, y_test = train_test_split(ds.X, ds.y, test_size=0.3, random_state=1) clf = MLPClassifier(algorithm='l-bfgs', max_iter=50, alpha=1e-6, hidden_layer_sizes=10000, random_state=1) # classifier = clf.fit(X_train, y_train) print(cross_val_score(clf, ds.X, ds.y, cv=10, n_jobs=-1))
def main(): ds = DataSet() with open(INPUT_DATA_FILE, "r", newline='', encoding="utf8") as csv_file: ds.extract_from_csv(csv_file) print("Ranking (descending)", ds.create_features_ranking(use_names=True)) experiment_results = {} final_counter = Counter() for layer_size in HIDDEN_LAYER_SIZES: experiment_results[layer_size] = {} for n_features in range(1, ds.number_of_features, 1): result = run_experiment(ds.X, ds.y, hidden_layer_size=layer_size, n_features=n_features) experiment_results[layer_size][n_features] = result final_counter.update(result.counter) print_result(result, layer_size, n_features) print("\nNum of times features were selected: {}".format(final_counter)) generate_plots(experiment_results, ds.number_of_features, ds.col_names, final_counter)
class TestDataSet(TestCase): def setUp(self): self.init_column_names = ["feat_001", "X-coord", "h,std,dev", "Grade"] self.init_features = [[0., 0.1, 3.], [0., 0.2, 0.], [0., 0.3, 0.5]] self.init_classes = ["G3", "G1", "G1"] self.expected_extracted_column_names = self.init_column_names self.expected_extracted_features = [[1.7, 3., 0.09], [-5., -1.12, 0.]] self.expected_extracted_classes = ["G2", "G3"] self.data_set_dir = path.join(path.dirname(__file__), "data_sets") self.data_set = DataSet(X=self.init_features, y=self.init_classes, col_names=self.init_column_names) def check_extracted(self): self.assertListEqual(self.expected_extracted_column_names, self.data_set.col_names) self.assertListEqual(self.expected_extracted_features, self.data_set.X) self.assertListEqual(self.expected_extracted_classes, self.data_set.y) def test_should_initialize_properly(self): self.assertListEqual(self.init_column_names, self.data_set.col_names) self.assertListEqual(self.init_features, self.data_set.X) self.assertListEqual(self.init_classes, self.data_set.y) def test_should_extract_features_and_classes_from_csv_with_header(self): with open(path.join(DATA_SETS_DIR, "mock_data_set_with_header.csv"), "r", newline='', encoding="utf8") as csv_file: self.data_set.extract_from_csv(csv_file) self.check_extracted() def test_should_raise_error_on_feature_size_mismatch(self): with self.assertRaises(RuntimeError) as cm: DataSet(X=[[1], [1, 2]]) with self.assertRaises(RuntimeError): DataSet(y=[1]) with self.assertRaises(RuntimeError): DataSet(X=[[1], [1, 2]], y=[1]) with self.assertRaises(RuntimeError): DataSet(X=[[1], [1, 2]], y=[1, 1]) with self.assertRaises(RuntimeError): DataSet(X=[[1], [1, 2]], y=[1, 1, 1]) with self.assertRaises(RuntimeError): DataSet(col_names=["a"]) with self.assertRaises(RuntimeError): DataSet(X=[[1, 2], [1, 2]], y=[1, 1], col_names=["a"]) with open(path.join(DATA_SETS_DIR, "mock_data_set_corrupted.csv"), "r", newline='', encoding="utf8") as csv_file: with self.assertRaises(RuntimeError): self.data_set.extract_from_csv(csv_file) def test_should_return_number_of_features(self): self.assertEqual(len(self.init_features[0]), self.data_set.number_of_features) def test_should_return_column_name(self): for index, element in enumerate(self.init_column_names): self.assertEqual(element, self.data_set.col_names[index]) def test_should_create_ranking(self): ranking = [2, 1, 0] self.assertListEqual( self.data_set.create_features_ranking(use_names=False), ranking)