def knn_classify(df_gender, predicted_variable): X_train, X_test, y_train, y_test = Utils.split_data(df_gender) neigh = KNeighborsClassifier(n_neighbors=18) neigh.fit(X_train, y_train) pickle.dump(neigh, open("resources/KNNimages_" + predicted_variable + ".sav", 'wb')) y_pred = neigh.predict(X_test) print("KNN acc: ", accuracy_score(y_test, y_pred))
def kernel_estimation(df_gender): X_train, X_test, y_train, y_test = Utils.split_data(df_gender) rbf_feature = RBFSampler() X_features = rbf_feature.fit_transform(X_train) clf = SGDClassifier() clf.fit(X_features,y_train) print("Kernel Density acc: ", clf.score(X_features, y_train))
def logistic_regression_customized(df): X_train, X_test, y_train, y_test = Utils.split_data(df) clf = LogisticRegression() clf.fit(X_train, y_train) y_pred = clf.predict_proba(X_train) y_df = pd.DataFrame(y_pred) chos = y_df.max(axis=1) chos_df = pd.DataFrame(chos) indexes = set() for index, row in chos_df.iterrows(): if row[0] < 0.30: indexes.add(index) y_prediction = clf.predict(X_test) for j in range(y_prediction.shape[0]): if j in indexes: y_prediction[j] = "xx-24" print("logistic regression acc: ", accuracy_score(y_test, y_prediction))
def svm_estimation(df_gender): X_train, X_test, y_train, y_test = Utils.split_data(df_gender) clf = svm.SVC(gamma='scale') clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("SVM acc: ", accuracy_score(y_test, y_pred))
def sgd_classify(df_gender): X_train, X_test, y_train, y_test = Utils.split_data(df_gender) clf = SGDClassifier(loss="hinge", penalty="l2") clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("sgd acc: ", accuracy_score(y_test, y_pred))