def knn_classify(df_gender, predicted_variable):
     X_train, X_test, y_train, y_test = Utils.split_data(df_gender)
     neigh = KNeighborsClassifier(n_neighbors=18)
     neigh.fit(X_train, y_train)
     pickle.dump(neigh, open("resources/KNNimages_" + predicted_variable + ".sav", 'wb'))
     y_pred = neigh.predict(X_test)
     print("KNN acc: ", accuracy_score(y_test, y_pred))
 def kernel_estimation(df_gender):
     X_train, X_test, y_train, y_test = Utils.split_data(df_gender)
     rbf_feature = RBFSampler()
     X_features = rbf_feature.fit_transform(X_train)
     clf = SGDClassifier()
     clf.fit(X_features,y_train)
     print("Kernel Density acc: ", clf.score(X_features, y_train))
Beispiel #3
0
    def logistic_regression_customized(df):
        X_train, X_test, y_train, y_test = Utils.split_data(df)
        clf = LogisticRegression()
        clf.fit(X_train, y_train)

        y_pred = clf.predict_proba(X_train)
        y_df = pd.DataFrame(y_pred)
        chos = y_df.max(axis=1)
        chos_df = pd.DataFrame(chos)
        indexes = set()
        for index, row in chos_df.iterrows():
            if row[0] < 0.30:
                indexes.add(index)

        y_prediction = clf.predict(X_test)

        for j in range(y_prediction.shape[0]):
            if j in indexes:
                y_prediction[j] = "xx-24"

        print("logistic regression acc: ",
              accuracy_score(y_test, y_prediction))
 def svm_estimation(df_gender):
     X_train, X_test, y_train, y_test = Utils.split_data(df_gender)
     clf = svm.SVC(gamma='scale')
     clf.fit(X_train, y_train)
     y_pred = clf.predict(X_test)
     print("SVM acc: ", accuracy_score(y_test, y_pred))
 def sgd_classify(df_gender):
     X_train, X_test, y_train, y_test = Utils.split_data(df_gender)
     clf = SGDClassifier(loss="hinge", penalty="l2")
     clf.fit(X_train, y_train)
     y_pred = clf.predict(X_test)
     print("sgd acc: ", accuracy_score(y_test, y_pred))