Ejemplo n.º 1
0
def main():
    print("Tesing the performance of Kmeans...")
    # Load data
    X, y = load_breast_cancer()
    X = min_max_scale(X)
    # Train model
    est = KMeans()
    k = 2
    est.fit(X, k)
    print()
    # Model performance
    prob_pos = sum(y) / len(y)
    print("Positive probability of X is:%.1f%%.\n" % (prob_pos * 100))
    y_hat = est.predict(X)
    cluster_pos_tot_cnt = {i: [0, 0] for i in range(k)}
    for yi_hat, yi in zip(y_hat, y):
        cluster_pos_tot_cnt[yi_hat][0] += yi
        cluster_pos_tot_cnt[yi_hat][1] += 1
    cluster_prob_pos = {k: v[0] / v[1] for k, v in cluster_pos_tot_cnt.items()}
    for i in range(k):
        tot_cnt = cluster_pos_tot_cnt[i][1]
        prob_pos = cluster_prob_pos[i]
        print("Count of elements in cluster %d is:%d." %
              (i, tot_cnt))
        print("Positive probability of cluster %d is:%.1f%%.\n" %
              (i, prob_pos * 100))
def main():
    """Tesing the performance of LogisticRegression.
    """
    @run_time
    def batch():
        print("Tesing the performance of LogisticRegression(batch)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(data=data_train, label=label_train, learning_rate=0.1, epochs=1000)
        # Model evaluation
        model_evaluation(clf, data_test, label_test)
        print(clf)

    @run_time
    def stochastic():
        print("Tesing the performance of LogisticRegression(stochastic)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(data=data_train, label=label_train, learning_rate=0.01, epochs=100,
                method="stochastic", sample_rate=0.8)
        # Model evaluation
        model_evaluation(clf, data_test, label_test)
        print(clf)

    # Load data
    data, label = load_breast_cancer()
    data = min_max_scale(data)
    # Split data randomly, train set rate 70%
    data_train, data_test, label_train, label_test = train_test_split(
        data, label, random_state=10)
    batch()
    print()
    stochastic()
Ejemplo n.º 3
0
def main():
    @run_time
    def batch():
        print("Tesing the performance of LogisticRegression(batch)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(X=X_train, y=y_train, lr=0.1, epochs=1000)
        # Model evaluation
        model_evaluation(clf, X_test, y_test)
        print(clf)

    @run_time
    def stochastic():
        print("Tesing the performance of LogisticRegression(stochastic)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(X=X_train, y=y_train, lr=0.01, epochs=100,
                method="stochastic", sample_rate=0.8)
        # Model evaluation
        model_evaluation(clf, X_test, y_test)
        print(clf)

    # Load data
    X, y = load_breast_cancer()
    X = min_max_scale(X)
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    batch()
    print()
    stochastic()
Ejemplo n.º 4
0
def main():
    print("Tesing the performance of RandomForest...")
    # Load data
    X, y = load_breast_cancer()
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=40)

    # Train model
    rf = RandomForest()
    rf.fit(X_train, y_train, n_samples=300, max_depth=3, n_estimators=20)
    # Model evaluation
    model_evaluation(rf, X_test, y_test)
Ejemplo n.º 5
0
def main():
    print("Tesing the performance of KNN classifier...")
    # Load data
    X, y = load_breast_cancer()
    X = min_max_scale(X)
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)
    # Train model
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train, k_neighbors=21)
    # Model evaluation
    model_evaluation(clf, X_test, y_test)
Ejemplo n.º 6
0
def main():
    print("Tesing the performance of Gaussian NaiveBayes...")
    # Load data
    X, y = load_breast_cancer()
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    # Train model
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    # Model evaluation
    y_hat = clf.predict(X_test)
    acc = _get_acc(y_test, y_hat)
    print("Accuracy is %.3f" % acc)
Ejemplo n.º 7
0
def main():
    print("Tesing the performance of DecisionTree...")
    # Load data
    X, y = load_breast_cancer()
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    # Train model
    clf = DecisionTree()
    clf.fit(X_train, y_train, max_depth=3)
    # Show rules
    clf.rules
    # Model evaluation
    model_evaluation(clf, X_test, y_test)
def main():
    """Tesing the performance of DecisionTree
    """
    print("Tesing the performance of DecisionTree...")
    # Load data
    data, label = load_breast_cancer()
    # Split data randomly, train set rate 70%
    data_train, data_test, label_train, label_test = train_test_split(
        data, label, random_state=10)
    # Train model
    clf = DecisionTree()
    clf.fit(data_train, label_train, max_depth=4)
    # Show rules
    print(clf)
    # Model evaluation
    model_evaluation(clf, data_test, label_test)
Ejemplo n.º 9
0
def main():
    print("Tesing the performance of GBDT classifier...")
    # Load data
    X, y = load_breast_cancer()
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20)
    # Train model
    clf = GradientBoostingClassifier()
    clf.fit(X_train,
            y_train,
            n_estimators=2,
            lr=0.8,
            max_depth=3,
            min_samples_split=2)
    # Model evaluation
    model_evaluation(clf, X_test, y_test)
Ejemplo n.º 10
0
def main():
    """Tesing the performance of Gaussian NaiveBayes.
    """

    print("Tesing the performance of Gaussian NaiveBayes...")
    # Load data
    data, label = load_breast_cancer()
    # Split data randomly, train set rate 70%
    data_train, data_test, label_train, label_test = train_test_split(data, label, random_state=100)
    # Train model
    clf = GaussianNB()
    clf.fit(data_train, label_train)
    # Model evaluation
    y_hat = clf.predict(data_test)
    acc = _get_acc(label_test, y_hat)
    print("Accuracy is %.3f" % acc)
def main():
    """Tesing the performance of RandomForest...
    """
    print("Tesing the performance of RandomForest...")
    # Load data
    data, label = load_breast_cancer()
    # Split data randomly, train set rate 70%
    data_train, data_test, label_train, label_test = train_test_split(
        data, label, random_state=40)

    # Train model
    clf = RandomForest()
    clf.fit(data_train,
            label_train,
            n_estimators=50,
            max_depth=5,
            random_state=10)
    # Model evaluation
    model_evaluation(clf, data_test, label_test)
Ejemplo n.º 12
0
def main():
    """Tesing the performance of GBDT classifier"""

    print("Tesing the performance of GBDT classifier...")
    # Load data
    data, label = load_breast_cancer()
    # Split data randomly, train set rate 70%
    data_train, data_test, label_train, label_test = train_test_split(
        data, label, random_state=20)
    # Train model
    clf = GradientBoostingClassifier()
    clf.fit(data_train,
            label_train,
            n_estimators=2,
            learning_rate=0.8,
            max_depth=3,
            min_samples_split=2)
    # Model evaluation
    model_evaluation(clf, data_test, label_test)