Ejemplo n.º 1
0
def main():
    settings_path = get_settings_path_from_arg("decision_tree")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "decision_tree_classifier", logging.DEBUG)

    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)

    ms = ModelSelector()

     # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) *10
    dl.load_from_images(settings.data.image_path, k, k*3, verbose=False)

    # Add Data Selector
    ds = DataSelector(
        dl.id_train, dl.x_train, dl.y_train,
        dl.id_test, dl.x_test
    )
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(
        dl.id_train, dr.x_train, dl.y_train,
        dl.id_test, dr.x_test
    )
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)


    clf = DecisionTreeClassifier()
    ms.add_classifier("", clf)

    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("qda_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "qda_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add Classifier to model selector
    clf = QuadraticDiscriminantAnalysis(store_covariance=True)
    ms.add_classifier("store_covariance=True", clf)

    clf = QuadraticDiscriminantAnalysis(store_covariance=False)
    ms.add_classifier("store_covariance=False", clf)

    clf = QuadraticDiscriminantAnalysis(priors=None,
                                        reg_param=0.0,
                                        store_covariance=False,
                                        store_covariances=None,
                                        tol=0.0001)
    ms.add_classifier("final", clf)

    # Get best model
    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("naive_bayes_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "naive_bayes_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    # Add classifier to model selector
    clf = BernoulliNB(alpha=3.5, binarize=0.03)
    ms.add_classifier("bernoulli", clf)

    # Get best model
    ms.get_best_model(k=10)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("random_forest_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "random_forest_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    # Add Classifier to model selector
    clf = RandomForestClassifier(n_estimators=13,
                                 random_state=0,
                                 min_samples_leaf=3,
                                 bootstrap=False)
    ms.add_classifier(
        "n_estimators=13, random_state=0, min_samples_leaf=3, bootsrap=False",
        clf)

    # Get best model
    ms.get_best_model(k=10)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("k_neighbors_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "k_neighbors_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    for i in range(1, 40):
        clf_k = KNeighborsClassifier(i, weights="distance", p=1)
        ms.add_classifier("k_{}_distance_p1".format(i), clf_k)

    # Get best model
    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
Ejemplo n.º 6
0
    logging.info("[ImageFeatureExtractor] Histogram generated")
    logging.debug("[ImageFeatureExtractor] Histogram:{}".format(histo_list))
    return histo_list


def extract_descriptor_to_csv(img_path):
    """Extract image descriptor into csv"""


def extract_feature_to_csv(img_path, ids, k, batch_size=None, verbose=False):
    """Extract feature from image and save to csv"""


if __name__ == "__main__":
    settings_path = get_settings_path_from_arg("image_feature_extractor")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "image_feature_extractor", logging.DEBUG)

    train_data = pd.read_csv(settings.data.train_path)
    test_data = pd.read_csv(settings.data.test_path)
    species = train_data["species"]
    train_data = train_data.drop("species", axis=1)

    k = np.size(species) * 10
    batch_size = np.size(os.listdir(settings.data.image_path)) * 3
    print(
        get_feature(settings.data.image_path,
                    test_data['id'],
                    k,
Ejemplo n.º 7
0
def main():
    settings_path = get_settings_path_from_arg("main_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "main_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training data
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    # Instantiate all the classifiers to be added
    clf_knn = KNeighborsClassifier(6, weights="distance", p=1)
    clf_adaboost = AdaBoostClassifier(learning_rate=0.01)
    clf_dectree = DecisionTreeClassifier(min_impurity_decrease=0.02)
    clf_gaussian = GaussianNB()
    clf_lda = LinearDiscriminantAnalysis()
    clf_gradientboost = GradientBoostingClassifier(n_estimators=100,
                                                   random_state=0,
                                                   min_samples_leaf=3,
                                                   verbose=True)
    clf_bernoullinb = BernoulliNB(alpha=3.5, binarize=0.03)
    clf_nnkeras = NNKeras(ds.selected_x_test.shape[1])
    clf_svc = SVC(probability=True, C=1000, gamma=1)
    clf_nusvc = NuSVC(nu=0.1, gamma=10, probability=True)
    clf_qda = QuadraticDiscriminantAnalysis(store_covariance=True)
    clf_randomforest = RandomForestClassifier(n_estimators=13,
                                              random_state=0,
                                              min_samples_leaf=3,
                                              bootstrap=False)

    # Add all the classifiers to the model selector
    ms.add_classifier("KNN", clf_knn)
    ms.add_classifier("AdaBoost", clf_adaboost)
    ms.add_classifier("Decision Tree", clf_dectree)
    ms.add_classifier("Gaussian NB", clf_gaussian)
    ms.add_classifier("LDA", clf_lda)
    ms.add_classifier("Gradient Boosting", clf_gradientboost)
    ms.add_classifier("Bernoulli NB", clf_bernoullinb)
    ms.add_classifier("NN Keras", clf_nnkeras)
    ms.add_classifier("SVC", clf_svc)
    ms.add_classifier("NuSVC", clf_nusvc)
    ms.add_classifier("QDA", clf_qda)
    ms.add_classifier("Random Forest", clf_randomforest)

    # Get best model
    ms.get_best_model(k=10, plot=True)
Ejemplo n.º 8
0
        self.pca_x_train = data_pca[:len(self.x_train)]
        self.pca_x_train = pd.DataFrame(self.pca_x_train, columns=col_names)
        self.pca_x_test = data_pca[len(self.x_train):]
        self.pca_x_test = pd.DataFrame(self.pca_x_test, columns=col_names)

    def get_pca_x_train(self):
        """ get x_train after pca """
        return self.pca_x_train

    def get_pca_x_test(self):
        """ get x_test after pca """
        return self.pca_x_test


if __name__ == "__main__":
    settings_path = get_settings_path_from_arg("data_reducer")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "data_reducer", logging.DEBUG)

    train_data = pd.read_csv(settings.data.train_path)
    test_data = pd.read_csv(settings.data.test_path)

    train_data = train_data.drop("species", axis=1)

    dr = DataReducer(train_data, test_data)
    dr.pca_data_reduction()

    pca_train_data = dr.get_pca_x_train()
    pca_test_data = dr.get_pca_x_test()