def main(): @run_time def batch(): print("Tesing the accuracy of LogisticRegression(batch)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.05, epochs=200) # Model accuracy get_acc(clf, X_test, y_test) @run_time def stochastic(): print("Tesing the accuracy of LogisticRegression(stochastic)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.01, epochs=200, method="stochastic", sample_rate=0.5) # Model accuracy get_acc(clf, X_test, y_test) # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) batch() stochastic()
def main(): print("Tesing the performance of Kmeans...") # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Train model est = KMeans() k = 2 est.fit(X, k) print() # Model performance prob_pos = sum(y) / len(y) print("Positive probability of X is:%.1f%%.\n" % (prob_pos * 100)) y_hat = est.predict(X) cluster_pos_tot_cnt = {i: [0, 0] for i in range(k)} for yi_hat, yi in zip(y_hat, y): cluster_pos_tot_cnt[yi_hat][0] += yi cluster_pos_tot_cnt[yi_hat][1] += 1 cluster_prob_pos = {k: v[0] / v[1] for k, v in cluster_pos_tot_cnt.items()} for i in range(k): tot_cnt = cluster_pos_tot_cnt[i][1] prob_pos = cluster_prob_pos[i] print("Count of elements in cluster %d is:%d." % (i, tot_cnt)) print("Positive probability of cluster %d is:%.1f%%.\n" % (i, prob_pos * 100))
def main(): print("Tesing the accuracy of KNN classifier...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20) # Train model clf = KNeighborsClassifier() clf.fit(X_train, y_train, k_neighbors=21) # Model accuracy get_acc(clf, X_test, y_test)
def main(): print("Tesing the accuracy of Gaussian NaiveBayes...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model clf = GaussianNB() clf.fit(X_train, y_train) # Model accuracy get_acc(clf, X_test, y_test)
def main(): print("Tesing the accuracy of RandomForest...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=40) # Train model rf = RandomForest() rf.fit(X_train, y_train, n_samples=300, max_depth=3, n_estimators=20) # Model accuracy get_acc(rf, X_test, y_test)
def main(): print("Tesing the accuracy of DecisionTree...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model clf = DecisionTree() clf.fit(X_train, y_train, max_depth=3) # Show rules clf.rules # Model accuracy get_acc(clf, X_test, y_test)
def main(): print("Tesing the accuracy of GBDT classifier...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20) # Train model clf = GradientBoostingClassifier() clf.fit(X_train, y_train, n_estimators=2, lr=0.8, max_depth=3, min_samples_split=2) # Model accuracy get_acc(clf, X_test, y_test)