Ejemplo n.º 1
0
def bagging(x_in, y_in, x_test_in, y_test_in):
    """改变权重,训练多个分类器,投票"""

    # 先预测
    clf1 = LogisticRegression().fit(x_in, y_in)
    predict1 = clf1.predict(x_test_in, y_test_in)
    clf2 = LinearSVM().fit(x_in, y_in)
    predict2 = clf2.predict(x_test_in, y_test_in)
    clf3 = CartDecisionTree().fit(x_in, y_in)
    predict3 = clf3.predict(x_test_in, y_test_in)

    # 收集投票
    predict = np.zeros_like(predict1)
    count = 0
    for i in range(np.size(y_test_in, axis=0)):
        if predict1[i] == predict2[i]:
            predict[i] = predict2[i]
        elif predict1[i] == predict3[i]:
            predict[i] = predict1[i]
        else:
            predict[i] = predict2[i]
        if predict[i] == y_test_in[i]:
            count += 1
    acc = count / np.size(y_test_in, axis=0) * 100
    print("Bagging ACC: %.2f%%" % acc)
    return 0
Ejemplo n.º 2
0
X= pd.DataFrame(X)
y = pd.Series(y)
result=[]
x = []
for j in range(30):
    ans = []
    for i in range(5):
        X1 = X[0:i*20]
        X2 = X[(i+1)*20:]
        X_train = X1.append(X2)
        X_test = X[i*20:(i+1)*20]
        y1 = y[0:i*20]
        y2 = y[(i+1)*20:]
        y_train = y1.append(y2)
        y_test = y[i*20:(i+1)*20]
        clf = LogisticRegression(l1_coef= j*2).l1_fit(X_train,y_train)
        y_hat = clf.predict(X_test)
        y_t = list(y_test)
        answer = 0
        for i in range(len(y_t)):
            if int(y_hat[i]) == y_t[i]:
                answer+=1
        ans.append((answer)/len(y_t))
    result.append((sum(ans)/len(ans)))
    x.append(j*5)
    print(ans)
print(result)
plt.plot(x,result)
plt.xlabel("Panelty Coefficient")
plt.ylabel("Accuracy")
plt.show()
Ejemplo n.º 3
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.DataFrame(load_breast_cancer().data)
df = data.sample(n=2, axis='columns')
y = pd.Series(load_breast_cancer().target)
X_train, X_test, y_train, y_test = train_test_split(df,
                                                    y,
                                                    test_size=0.30,
                                                    random_state=42)
LogisticRegression().plot_decision_boundary(X_train, y_train)
Ejemplo n.º 4
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pre-training we will train these autoencoders (which will
        # lead to changing the weights of the MLP as well)
        # During fine-tuning we will finish training the SdA by doing
        # stochastic gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Ejemplo n.º 5
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
X, y = make_classification(n_samples=1000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
clf = LogisticRegression().l1_fit(X_train, y_train)
y_hat = (clf.predict(X_test))
ans = clf.score1(y_hat,y_test)
print("For L1 regularised Logistic Regression ")
print(ans)

clf = LogisticRegression().l2_fit(X_train, y_train)
y_hat = (clf.predict(X_test))
ans = clf.score2(y_hat,y_test)
print("For L2 regularised Logistic Regression ")
print(ans)
Ejemplo n.º 6
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np

data = np.array(load_breast_cancer().data)
y = np.array(load_breast_cancer().target)
kf = KFold(n_splits=3)
for train_index,test_index in kf.split(data):
    X_train,X_test = data[train_index], data[test_index]
    y_train,y_test = y[train_index], y[test_index]

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.Series(y_train)
y_test = pd.Series(y_test)
clf = LogisticRegression().fit(X_train, y_train)
y_hat = list(clf.predict(X_test))
y_t = list(y_test)
print("Overall Accuracy with K = 3 Folds")
print(clf.score1(y_hat,y_t))
Ejemplo n.º 7
0
from Logistic_Regression import LogisticRegression
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np
N = 50
P = 8
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randint(0,2,N))
clf = LogisticRegression().fit(X, y)
y_hat = (clf.predict(X))
ans = clf.score1(y_hat,y)
print("Accuracy with Gradient_Descent Normally")
print(ans)

clf = LogisticRegression().fit_autograd(X, y)
y_hat = (clf.predict(X))
ans = clf.score2(y_hat,y)
print("Accuracy with Autograd Implementation")
print(ans)
Ejemplo n.º 8
0
        classSetDict[CN].add(line)

# 採樣訓練資料所佔比例並寫檔
with open('TempTrainingData.txt', 'w') as wf_Train:
    for key in classSetDict.keys():  # '1', '2', '3'
        for line in random.sample(
                classSetDict[key],
                int(totalDataCountDict[int(key)] * float(sys.argv[1]))):
            wf_Train.write(line)
            classSetDict[key].remove(line)
            # classSetDict中剩下的已是測試資料

# 用以計算總正確率
totalCorrectNumber = 0
# 利用NaiveBayes進行分類
LR_Obj = LogisticRegression('TempTrainingData.txt')
for classNum in classSetDict.keys():  # '1', '2', '3'
    for line in classSetDict[classNum]:
        if LR_Obj.get_classification(line) == (int(classNum) - 1):
            correctnessDict['Class' + classNum] += 1
            totalCorrectNumber += 1

# 總正確率計算
number_of_data_in_TempTraining = 0
for key in classSetDict.keys():
    number_of_data_in_TempTraining += len(classSetDict[key])

totalCorrectnessRatio = float(totalCorrectNumber) / float(
    number_of_data_in_TempTraining)
print('***總正確率為: ' + str(totalCorrectnessRatio))
Ejemplo n.º 9
0
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting Logistic Regression to the Training set

classifier = LogisticRegression(lr=0.001)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = np.array(classifier.predict(X_test))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, np.array(classifier.predict(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
Ejemplo n.º 10
0
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
from sklearn import datasets
from Logistic_Regression import LogisticRegression

iris = datasets.load_iris()

X = iris.data[:, :2]
y = (iris.target != 0) *1

clf = LogisticRegression()
clf.fit(X,y)

pred = clf.predict(X)


plt.figure(figsize=(10, 6))
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='b', label='0')
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='r', label='1')
plt.legend()
x1_min, x1_max = X[:,0].min(), X[:,0].max(),
x2_min, x2_max = X[:,1].min(), X[:,1].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
probs = clf.predict_prob(grid).reshape(xx1.shape)
plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='black')
plt.show()
Ejemplo n.º 11
0
def adaboost(x_ada, y_ada, x_test_in, y_test_in):
    # 初始化权重
    weight = np.ones((np.size(x_ada, axis=0), 1))
    weight /= np.size(x_ada, axis=0)
    weight_list = []
    classifier_list = []

    # 训练算法
    clf1 = LogisticRegression().fit(x_ada, y_ada)
    predict1 = clf1.predict(x_ada, y_ada)
    clf2 = LinearSVM().fit(x_ada, y_ada)
    predict2 = clf2.predict(x_ada, y_ada)
    clf3 = CartDecisionTree().fit(x_ada, y_ada)
    predict3 = clf3.predict(x_ada, y_ada)

    # 组合分类器
    for i in range(Adaboost_EPOCH):
        e1 = 0
        e2 = 0
        e3 = 0

        # 计算误差
        for j in range(np.size(x_ada, axis=0)):
            if predict1[j] != y_ada[j]:
                e1 += weight[j]
            if predict2[j] != y_ada[j]:
                e2 += weight[j]
            if predict3[j] != y_ada[j]:
                e3 += weight[j]

        # 选择小误差的模型
        if e1[0] <= e2[0] and e1[0] <= e3[0]:
            clf = clf1
            a = 1 / 2 * np.log((1 - e1[0]) / e1[0])
            predict = predict1
        elif e2[0] <= e1[0] and e2[0] <= e3[0]:
            clf = clf2
            a = 1 / 2 * np.log((1 - e2[0]) / e2[0])
            predict = predict2
        else:
            clf = clf3
            a = 1 / 2 * np.log((1 - e3[0]) / e3[0])
            predict = predict3

        # 更新权重
        z = np.sum(np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4),
                   axis=0)  # x, y化成0或1
        weight = weight * np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4) / z
        weight_list.append(a)
        classifier_list.append(clf)

    # 评估acc
    predict_sum = 0
    predict_get = np.zeros_like(y_test_in)
    acc_count = 0
    for l in range(Adaboost_EPOCH):
        predict_sum += weight_list[l] * (
            classifier_list[l].predict(x_test_in, y_test_in) - 0.5) * 2
    for k in range(np.size(y_test_in, axis=0)):
        if predict_sum[k] / Adaboost_EPOCH >= 0:
            predict_get[k] = 1
        else:
            predict_get[k] = 0
        if predict_get[k] == y_test_in[k]:
            acc_count += 1
    acc = acc_count / np.size(y_test_in, axis=0) * 100
    print('Adaboost ACC: %.2f%%' % acc)
    return 0
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

# Principle component analysis (dimensionality reduction)
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
f_measure_test = []
f_measure_train = []
Lambda = []

# Training logistic regression classifier with L2 penalty
for i in float_range(-2, -0.2, 0.2):

    C_ = 1 / i
    LR = LogisticRegression(learningRate=0.1, numEpochs=10, penalty='L2',
                            C=i)  # range from 0.01 - 0.03
    LR.train(X_train_pca, y_train, tol=10**-3)
    # LR.plotCost()
    # Testing fitted model on test data with cutoff probability 50%
    predictions, probs = LR.predict(X_test_pca, 0.5)
    performance = LR.performanceEval(predictions, y_test)
    # added
    predictions_train, probs_train = LR.predict(X_train_pca, 0.5)
    performance_train = LR.performanceEval(predictions_train, y_train)
    # LR.plotDecisionRegions(X_test_pca, y_test)
    # LR.predictionPlot(X_test_pca, y_test)

    # Print out performance values
    for key, value in performance.items():
        print('%s : %.2f' % (key, value))
    print("\n")
Ejemplo n.º 13
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
Ejemplo n.º 14
0
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # Logistic_Regression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out
        )
        # end-snippet-2 start-snippet-3
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()
            + (self.logRegressionLayer.W ** 2).sum()
        )

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood
        )
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params