def main(): print("Tesing the performance of Kmeans...") # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Train model est = KMeans() k = 2 est.fit(X, k) print() # Model performance prob_pos = sum(y) / len(y) print("Positive probability of X is:%.1f%%.\n" % (prob_pos * 100)) y_hat = est.predict(X) cluster_pos_tot_cnt = {i: [0, 0] for i in range(k)} for yi_hat, yi in zip(y_hat, y): cluster_pos_tot_cnt[yi_hat][0] += yi cluster_pos_tot_cnt[yi_hat][1] += 1 cluster_prob_pos = {k: v[0] / v[1] for k, v in cluster_pos_tot_cnt.items()} for i in range(k): tot_cnt = cluster_pos_tot_cnt[i][1] prob_pos = cluster_prob_pos[i] print("Count of elements in cluster %d is:%d." % (i, tot_cnt)) print("Positive probability of cluster %d is:%.1f%%.\n" % (i, prob_pos * 100))
def main(): """Tesing the performance of LogisticRegression. """ @run_time def batch(): print("Tesing the performance of LogisticRegression(batch)...") # Train model clf = LogisticRegression() clf.fit(data=data_train, label=label_train, learning_rate=0.1, epochs=1000) # Model evaluation model_evaluation(clf, data_test, label_test) print(clf) @run_time def stochastic(): print("Tesing the performance of LogisticRegression(stochastic)...") # Train model clf = LogisticRegression() clf.fit(data=data_train, label=label_train, learning_rate=0.01, epochs=100, method="stochastic", sample_rate=0.8) # Model evaluation model_evaluation(clf, data_test, label_test) print(clf) # Load data data, label = load_breast_cancer() data = min_max_scale(data) # Split data randomly, train set rate 70% data_train, data_test, label_train, label_test = train_test_split( data, label, random_state=10) batch() print() stochastic()
def main(): @run_time def batch(): print("Tesing the performance of LogisticRegression(batch)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.1, epochs=1000) # Model evaluation model_evaluation(clf, X_test, y_test) print(clf) @run_time def stochastic(): print("Tesing the performance of LogisticRegression(stochastic)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.01, epochs=100, method="stochastic", sample_rate=0.8) # Model evaluation model_evaluation(clf, X_test, y_test) print(clf) # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) batch() print() stochastic()
def main(): print("Tesing the performance of RandomForest...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=40) # Train model rf = RandomForest() rf.fit(X_train, y_train, n_samples=300, max_depth=3, n_estimators=20) # Model evaluation model_evaluation(rf, X_test, y_test)
def main(): print("Tesing the performance of KNN classifier...") # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20) # Train model clf = KNeighborsClassifier() clf.fit(X_train, y_train, k_neighbors=21) # Model evaluation model_evaluation(clf, X_test, y_test)
def main(): print("Tesing the performance of Gaussian NaiveBayes...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model clf = GaussianNB() clf.fit(X_train, y_train) # Model evaluation y_hat = clf.predict(X_test) acc = _get_acc(y_test, y_hat) print("Accuracy is %.3f" % acc)
def main(): print("Tesing the performance of DecisionTree...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model clf = DecisionTree() clf.fit(X_train, y_train, max_depth=3) # Show rules clf.rules # Model evaluation model_evaluation(clf, X_test, y_test)
def main(): """Tesing the performance of DecisionTree """ print("Tesing the performance of DecisionTree...") # Load data data, label = load_breast_cancer() # Split data randomly, train set rate 70% data_train, data_test, label_train, label_test = train_test_split( data, label, random_state=10) # Train model clf = DecisionTree() clf.fit(data_train, label_train, max_depth=4) # Show rules print(clf) # Model evaluation model_evaluation(clf, data_test, label_test)
def main(): print("Tesing the performance of GBDT classifier...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20) # Train model clf = GradientBoostingClassifier() clf.fit(X_train, y_train, n_estimators=2, lr=0.8, max_depth=3, min_samples_split=2) # Model evaluation model_evaluation(clf, X_test, y_test)
def main(): """Tesing the performance of Gaussian NaiveBayes. """ print("Tesing the performance of Gaussian NaiveBayes...") # Load data data, label = load_breast_cancer() # Split data randomly, train set rate 70% data_train, data_test, label_train, label_test = train_test_split(data, label, random_state=100) # Train model clf = GaussianNB() clf.fit(data_train, label_train) # Model evaluation y_hat = clf.predict(data_test) acc = _get_acc(label_test, y_hat) print("Accuracy is %.3f" % acc)
def main(): """Tesing the performance of RandomForest... """ print("Tesing the performance of RandomForest...") # Load data data, label = load_breast_cancer() # Split data randomly, train set rate 70% data_train, data_test, label_train, label_test = train_test_split( data, label, random_state=40) # Train model clf = RandomForest() clf.fit(data_train, label_train, n_estimators=50, max_depth=5, random_state=10) # Model evaluation model_evaluation(clf, data_test, label_test)
def main(): """Tesing the performance of GBDT classifier""" print("Tesing the performance of GBDT classifier...") # Load data data, label = load_breast_cancer() # Split data randomly, train set rate 70% data_train, data_test, label_train, label_test = train_test_split( data, label, random_state=20) # Train model clf = GradientBoostingClassifier() clf.fit(data_train, label_train, n_estimators=2, learning_rate=0.8, max_depth=3, min_samples_split=2) # Model evaluation model_evaluation(clf, data_test, label_test)