Beispiel #1
0
def bagging(x_in, y_in, x_test_in, y_test_in):
    """改变权重,训练多个分类器,投票"""

    # 先预测
    clf1 = LogisticRegression().fit(x_in, y_in)
    predict1 = clf1.predict(x_test_in, y_test_in)
    clf2 = LinearSVM().fit(x_in, y_in)
    predict2 = clf2.predict(x_test_in, y_test_in)
    clf3 = CartDecisionTree().fit(x_in, y_in)
    predict3 = clf3.predict(x_test_in, y_test_in)

    # 收集投票
    predict = np.zeros_like(predict1)
    count = 0
    for i in range(np.size(y_test_in, axis=0)):
        if predict1[i] == predict2[i]:
            predict[i] = predict2[i]
        elif predict1[i] == predict3[i]:
            predict[i] = predict1[i]
        else:
            predict[i] = predict2[i]
        if predict[i] == y_test_in[i]:
            count += 1
    acc = count / np.size(y_test_in, axis=0) * 100
    print("Bagging ACC: %.2f%%" % acc)
    return 0
Beispiel #2
0
y = pd.Series(y)
result=[]
x = []
for j in range(30):
    ans = []
    for i in range(5):
        X1 = X[0:i*20]
        X2 = X[(i+1)*20:]
        X_train = X1.append(X2)
        X_test = X[i*20:(i+1)*20]
        y1 = y[0:i*20]
        y2 = y[(i+1)*20:]
        y_train = y1.append(y2)
        y_test = y[i*20:(i+1)*20]
        clf = LogisticRegression(l1_coef= j*2).l1_fit(X_train,y_train)
        y_hat = clf.predict(X_test)
        y_t = list(y_test)
        answer = 0
        for i in range(len(y_t)):
            if int(y_hat[i]) == y_t[i]:
                answer+=1
        ans.append((answer)/len(y_t))
    result.append((sum(ans)/len(ans)))
    x.append(j*5)
    print(ans)
print(result)
plt.plot(x,result)
plt.xlabel("Panelty Coefficient")
plt.ylabel("Accuracy")
plt.show()
Beispiel #3
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
X, y = make_classification(n_samples=1000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
clf = LogisticRegression().l1_fit(X_train, y_train)
y_hat = (clf.predict(X_test))
ans = clf.score1(y_hat,y_test)
print("For L1 regularised Logistic Regression ")
print(ans)

clf = LogisticRegression().l2_fit(X_train, y_train)
y_hat = (clf.predict(X_test))
ans = clf.score2(y_hat,y_test)
print("For L2 regularised Logistic Regression ")
print(ans)
Beispiel #4
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np
N = 50
P = 8
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randint(0,2,N))
clf = LogisticRegression().fit(X, y)
y_hat = (clf.predict(X))
ans = clf.score1(y_hat,y)
print("Accuracy with Gradient_Descent Normally")
print(ans)

clf = LogisticRegression().fit_autograd(X, y)
y_hat = (clf.predict(X))
ans = clf.score2(y_hat,y)
print("Accuracy with Autograd Implementation")
print(ans)
Beispiel #5
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np

data = np.array(load_breast_cancer().data)
y = np.array(load_breast_cancer().target)
kf = KFold(n_splits=3)
for train_index,test_index in kf.split(data):
    X_train,X_test = data[train_index], data[test_index]
    y_train,y_test = y[train_index], y[test_index]

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.Series(y_train)
y_test = pd.Series(y_test)
clf = LogisticRegression().fit(X_train, y_train)
y_hat = list(clf.predict(X_test))
y_t = list(y_test)
print("Overall Accuracy with K = 3 Folds")
print(clf.score1(y_hat,y_t))
Beispiel #6
0
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting Logistic Regression to the Training set

classifier = LogisticRegression(lr=0.001)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = np.array(classifier.predict(X_test))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, np.array(classifier.predict(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
Beispiel #7
0
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
from sklearn import datasets
from Logistic_Regression import LogisticRegression

iris = datasets.load_iris()

X = iris.data[:, :2]
y = (iris.target != 0) *1

clf = LogisticRegression()
clf.fit(X,y)

pred = clf.predict(X)


plt.figure(figsize=(10, 6))
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='b', label='0')
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='r', label='1')
plt.legend()
x1_min, x1_max = X[:,0].min(), X[:,0].max(),
x2_min, x2_max = X[:,1].min(), X[:,1].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
probs = clf.predict_prob(grid).reshape(xx1.shape)
plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='black')
plt.show()
Beispiel #8
0
def adaboost(x_ada, y_ada, x_test_in, y_test_in):
    # 初始化权重
    weight = np.ones((np.size(x_ada, axis=0), 1))
    weight /= np.size(x_ada, axis=0)
    weight_list = []
    classifier_list = []

    # 训练算法
    clf1 = LogisticRegression().fit(x_ada, y_ada)
    predict1 = clf1.predict(x_ada, y_ada)
    clf2 = LinearSVM().fit(x_ada, y_ada)
    predict2 = clf2.predict(x_ada, y_ada)
    clf3 = CartDecisionTree().fit(x_ada, y_ada)
    predict3 = clf3.predict(x_ada, y_ada)

    # 组合分类器
    for i in range(Adaboost_EPOCH):
        e1 = 0
        e2 = 0
        e3 = 0

        # 计算误差
        for j in range(np.size(x_ada, axis=0)):
            if predict1[j] != y_ada[j]:
                e1 += weight[j]
            if predict2[j] != y_ada[j]:
                e2 += weight[j]
            if predict3[j] != y_ada[j]:
                e3 += weight[j]

        # 选择小误差的模型
        if e1[0] <= e2[0] and e1[0] <= e3[0]:
            clf = clf1
            a = 1 / 2 * np.log((1 - e1[0]) / e1[0])
            predict = predict1
        elif e2[0] <= e1[0] and e2[0] <= e3[0]:
            clf = clf2
            a = 1 / 2 * np.log((1 - e2[0]) / e2[0])
            predict = predict2
        else:
            clf = clf3
            a = 1 / 2 * np.log((1 - e3[0]) / e3[0])
            predict = predict3

        # 更新权重
        z = np.sum(np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4),
                   axis=0)  # x, y化成0或1
        weight = weight * np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4) / z
        weight_list.append(a)
        classifier_list.append(clf)

    # 评估acc
    predict_sum = 0
    predict_get = np.zeros_like(y_test_in)
    acc_count = 0
    for l in range(Adaboost_EPOCH):
        predict_sum += weight_list[l] * (
            classifier_list[l].predict(x_test_in, y_test_in) - 0.5) * 2
    for k in range(np.size(y_test_in, axis=0)):
        if predict_sum[k] / Adaboost_EPOCH >= 0:
            predict_get[k] = 1
        else:
            predict_get[k] = 0
        if predict_get[k] == y_test_in[k]:
            acc_count += 1
    acc = acc_count / np.size(y_test_in, axis=0) * 100
    print('Adaboost ACC: %.2f%%' % acc)
    return 0
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
f_measure_test = []
f_measure_train = []
Lambda = []

# Training logistic regression classifier with L2 penalty
for i in float_range(-2, -0.2, 0.2):

    C_ = 1 / i
    LR = LogisticRegression(learningRate=0.1, numEpochs=10, penalty='L2',
                            C=i)  # range from 0.01 - 0.03
    LR.train(X_train_pca, y_train, tol=10**-3)
    # LR.plotCost()
    # Testing fitted model on test data with cutoff probability 50%
    predictions, probs = LR.predict(X_test_pca, 0.5)
    performance = LR.performanceEval(predictions, y_test)
    # added
    predictions_train, probs_train = LR.predict(X_train_pca, 0.5)
    performance_train = LR.performanceEval(predictions_train, y_train)
    # LR.plotDecisionRegions(X_test_pca, y_test)
    # LR.predictionPlot(X_test_pca, y_test)

    # Print out performance values
    for key, value in performance.items():
        print('%s : %.2f' % (key, value))
    print("\n")
    f_measure_test.append(list(performance.values())[len(performance) - 1])
    f_measure_train.append(
        list(performance_train.values())[len(performance_train) - 1])
    Lambda.append(i)