def main(): settings_path = get_settings_path_from_arg("decision_tree") settings = load_settings(settings_path) init_logger(settings.log.dir, "decision_tree_classifier", logging.DEBUG) dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) *10 dl.load_from_images(settings.data.image_path, k, k*3, verbose=False) # Add Data Selector ds = DataSelector( dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test ) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector( dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test ) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) clf = DecisionTreeClassifier() ms.add_classifier("", clf) ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("qda_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "qda_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add Classifier to model selector clf = QuadraticDiscriminantAnalysis(store_covariance=True) ms.add_classifier("store_covariance=True", clf) clf = QuadraticDiscriminantAnalysis(store_covariance=False) ms.add_classifier("store_covariance=False", clf) clf = QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, store_covariance=False, store_covariances=None, tol=0.0001) ms.add_classifier("final", clf) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("random_forest_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "random_forest_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add Classifier to model selector clf = RandomForestClassifier(n_estimators=13, random_state=0, min_samples_leaf=3, bootstrap=False) ms.add_classifier( "n_estimators=13, random_state=0, min_samples_leaf=3, bootsrap=False", clf) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("k_neighbors_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "k_neighbors_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) for i in range(1, 40): clf_k = KNeighborsClassifier(i, weights="distance", p=1) ms.add_classifier("k_{}_distance_p1".format(i), clf_k) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("MLP_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "MLP_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_test(settings.data.test_path) dl.load_train(settings.data.train_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add classifier to model selector classifier = MLPClassifier(hidden_layer_sizes=(150), max_iter=1000) ms.add_classifier("MLP1", classifier) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("main_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "main_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training data dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Instantiate all the classifiers to be added clf_knn = KNeighborsClassifier(6, weights="distance", p=1) clf_adaboost = AdaBoostClassifier(learning_rate=0.01) clf_dectree = DecisionTreeClassifier(min_impurity_decrease=0.02) clf_gaussian = GaussianNB() clf_lda = LinearDiscriminantAnalysis() clf_gradientboost = GradientBoostingClassifier(n_estimators=100, random_state=0, min_samples_leaf=3, verbose=True) clf_bernoullinb = BernoulliNB(alpha=3.5, binarize=0.03) clf_nnkeras = NNKeras(ds.selected_x_test.shape[1]) clf_svc = SVC(probability=True, C=1000, gamma=1) clf_nusvc = NuSVC(nu=0.1, gamma=10, probability=True) clf_qda = QuadraticDiscriminantAnalysis(store_covariance=True) clf_randomforest = RandomForestClassifier(n_estimators=13, random_state=0, min_samples_leaf=3, bootstrap=False) # Add all the classifiers to the model selector ms.add_classifier("KNN", clf_knn) ms.add_classifier("AdaBoost", clf_adaboost) ms.add_classifier("Decision Tree", clf_dectree) ms.add_classifier("Gaussian NB", clf_gaussian) ms.add_classifier("LDA", clf_lda) ms.add_classifier("Gradient Boosting", clf_gradientboost) ms.add_classifier("Bernoulli NB", clf_bernoullinb) ms.add_classifier("NN Keras", clf_nnkeras) ms.add_classifier("SVC", clf_svc) ms.add_classifier("NuSVC", clf_nusvc) ms.add_classifier("QDA", clf_qda) ms.add_classifier("Random Forest", clf_randomforest) # Get best model ms.get_best_model(k=10, plot=True)