def bagging(x_in, y_in, x_test_in, y_test_in): """改变权重,训练多个分类器,投票""" # 先预测 clf1 = LogisticRegression().fit(x_in, y_in) predict1 = clf1.predict(x_test_in, y_test_in) clf2 = LinearSVM().fit(x_in, y_in) predict2 = clf2.predict(x_test_in, y_test_in) clf3 = CartDecisionTree().fit(x_in, y_in) predict3 = clf3.predict(x_test_in, y_test_in) # 收集投票 predict = np.zeros_like(predict1) count = 0 for i in range(np.size(y_test_in, axis=0)): if predict1[i] == predict2[i]: predict[i] = predict2[i] elif predict1[i] == predict3[i]: predict[i] = predict1[i] else: predict[i] = predict2[i] if predict[i] == y_test_in[i]: count += 1 acc = count / np.size(y_test_in, axis=0) * 100 print("Bagging ACC: %.2f%%" % acc) return 0
X= pd.DataFrame(X) y = pd.Series(y) result=[] x = [] for j in range(30): ans = [] for i in range(5): X1 = X[0:i*20] X2 = X[(i+1)*20:] X_train = X1.append(X2) X_test = X[i*20:(i+1)*20] y1 = y[0:i*20] y2 = y[(i+1)*20:] y_train = y1.append(y2) y_test = y[i*20:(i+1)*20] clf = LogisticRegression(l1_coef= j*2).l1_fit(X_train,y_train) y_hat = clf.predict(X_test) y_t = list(y_test) answer = 0 for i in range(len(y_t)): if int(y_hat[i]) == y_t[i]: answer+=1 ans.append((answer)/len(y_t)) result.append((sum(ans)/len(ans))) x.append(j*5) print(ans) print(result) plt.plot(x,result) plt.xlabel("Panelty Coefficient") plt.ylabel("Accuracy") plt.show()
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer import pandas as pd from sklearn.model_selection import train_test_split data = pd.DataFrame(load_breast_cancer().data) df = data.sample(n=2, axis='columns') y = pd.Series(load_breast_cancer().target) X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.30, random_state=42) LogisticRegression().plot_decision_boundary(X_train, y_train)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pre-training we will train these autoencoders (which will # lead to changing the weights of the MLP as well) # During fine-tuning we will finish training the SdA by doing # stochastic gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer from sklearn.datasets import make_classification import pandas as pd from sklearn.model_selection import train_test_split import numpy as np X, y = make_classification(n_samples=1000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) clf = LogisticRegression().l1_fit(X_train, y_train) y_hat = (clf.predict(X_test)) ans = clf.score1(y_hat,y_test) print("For L1 regularised Logistic Regression ") print(ans) clf = LogisticRegression().l2_fit(X_train, y_train) y_hat = (clf.predict(X_test)) ans = clf.score2(y_hat,y_test) print("For L2 regularised Logistic Regression ") print(ans)
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer import pandas as pd from sklearn.model_selection import KFold import numpy as np data = np.array(load_breast_cancer().data) y = np.array(load_breast_cancer().target) kf = KFold(n_splits=3) for train_index,test_index in kf.split(data): X_train,X_test = data[train_index], data[test_index] y_train,y_test = y[train_index], y[test_index] X_train = pd.DataFrame(X_train) X_test = pd.DataFrame(X_test) y_train = pd.Series(y_train) y_test = pd.Series(y_test) clf = LogisticRegression().fit(X_train, y_train) y_hat = list(clf.predict(X_test)) y_t = list(y_test) print("Overall Accuracy with K = 3 Folds") print(clf.score1(y_hat,y_t))
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer import pandas as pd import numpy as np N = 50 P = 8 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randint(0,2,N)) clf = LogisticRegression().fit(X, y) y_hat = (clf.predict(X)) ans = clf.score1(y_hat,y) print("Accuracy with Gradient_Descent Normally") print(ans) clf = LogisticRegression().fit_autograd(X, y) y_hat = (clf.predict(X)) ans = clf.score2(y_hat,y) print("Accuracy with Autograd Implementation") print(ans)
classSetDict[CN].add(line) # 採樣訓練資料所佔比例並寫檔 with open('TempTrainingData.txt', 'w') as wf_Train: for key in classSetDict.keys(): # '1', '2', '3' for line in random.sample( classSetDict[key], int(totalDataCountDict[int(key)] * float(sys.argv[1]))): wf_Train.write(line) classSetDict[key].remove(line) # classSetDict中剩下的已是測試資料 # 用以計算總正確率 totalCorrectNumber = 0 # 利用NaiveBayes進行分類 LR_Obj = LogisticRegression('TempTrainingData.txt') for classNum in classSetDict.keys(): # '1', '2', '3' for line in classSetDict[classNum]: if LR_Obj.get_classification(line) == (int(classNum) - 1): correctnessDict['Class' + classNum] += 1 totalCorrectNumber += 1 # 總正確率計算 number_of_data_in_TempTraining = 0 for key in classSetDict.keys(): number_of_data_in_TempTraining += len(classSetDict[key]) totalCorrectnessRatio = float(totalCorrectNumber) / float( number_of_data_in_TempTraining) print('***總正確率為: ' + str(totalCorrectnessRatio))
X = dataset.iloc[:, [2, 3]].values y = dataset.iloc[:, 4].values # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Fitting Logistic Regression to the Training set classifier = LogisticRegression(lr=0.001) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = np.array(classifier.predict(X_test)) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = X_train, y_train X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) plt.contourf(X1, X2, np.array(classifier.predict(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
import numpy as np import matplotlib.pyplot as plt # import seaborn as sns from sklearn import datasets from Logistic_Regression import LogisticRegression iris = datasets.load_iris() X = iris.data[:, :2] y = (iris.target != 0) *1 clf = LogisticRegression() clf.fit(X,y) pred = clf.predict(X) plt.figure(figsize=(10, 6)) plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='b', label='0') plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='r', label='1') plt.legend() x1_min, x1_max = X[:,0].min(), X[:,0].max(), x2_min, x2_max = X[:,1].min(), X[:,1].max(), xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) grid = np.c_[xx1.ravel(), xx2.ravel()] probs = clf.predict_prob(grid).reshape(xx1.shape) plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='black') plt.show()
def adaboost(x_ada, y_ada, x_test_in, y_test_in): # 初始化权重 weight = np.ones((np.size(x_ada, axis=0), 1)) weight /= np.size(x_ada, axis=0) weight_list = [] classifier_list = [] # 训练算法 clf1 = LogisticRegression().fit(x_ada, y_ada) predict1 = clf1.predict(x_ada, y_ada) clf2 = LinearSVM().fit(x_ada, y_ada) predict2 = clf2.predict(x_ada, y_ada) clf3 = CartDecisionTree().fit(x_ada, y_ada) predict3 = clf3.predict(x_ada, y_ada) # 组合分类器 for i in range(Adaboost_EPOCH): e1 = 0 e2 = 0 e3 = 0 # 计算误差 for j in range(np.size(x_ada, axis=0)): if predict1[j] != y_ada[j]: e1 += weight[j] if predict2[j] != y_ada[j]: e2 += weight[j] if predict3[j] != y_ada[j]: e3 += weight[j] # 选择小误差的模型 if e1[0] <= e2[0] and e1[0] <= e3[0]: clf = clf1 a = 1 / 2 * np.log((1 - e1[0]) / e1[0]) predict = predict1 elif e2[0] <= e1[0] and e2[0] <= e3[0]: clf = clf2 a = 1 / 2 * np.log((1 - e2[0]) / e2[0]) predict = predict2 else: clf = clf3 a = 1 / 2 * np.log((1 - e3[0]) / e3[0]) predict = predict3 # 更新权重 z = np.sum(np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4), axis=0) # x, y化成0或1 weight = weight * np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4) / z weight_list.append(a) classifier_list.append(clf) # 评估acc predict_sum = 0 predict_get = np.zeros_like(y_test_in) acc_count = 0 for l in range(Adaboost_EPOCH): predict_sum += weight_list[l] * ( classifier_list[l].predict(x_test_in, y_test_in) - 0.5) * 2 for k in range(np.size(y_test_in, axis=0)): if predict_sum[k] / Adaboost_EPOCH >= 0: predict_get[k] = 1 else: predict_get[k] = 0 if predict_get[k] == y_test_in[k]: acc_count += 1 acc = acc_count / np.size(y_test_in, axis=0) * 100 print('Adaboost ACC: %.2f%%' % acc) return 0
# X_train = sc.fit_transform(X_train) # X_test = sc.transform(X_test) # Principle component analysis (dimensionality reduction) pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) f_measure_test = [] f_measure_train = [] Lambda = [] # Training logistic regression classifier with L2 penalty for i in float_range(-2, -0.2, 0.2): C_ = 1 / i LR = LogisticRegression(learningRate=0.1, numEpochs=10, penalty='L2', C=i) # range from 0.01 - 0.03 LR.train(X_train_pca, y_train, tol=10**-3) # LR.plotCost() # Testing fitted model on test data with cutoff probability 50% predictions, probs = LR.predict(X_test_pca, 0.5) performance = LR.performanceEval(predictions, y_test) # added predictions_train, probs_train = LR.predict(X_train_pca, 0.5) performance_train = LR.performanceEval(predictions_train, y_train) # LR.plotDecisionRegions(X_test_pca, y_test) # LR.predictionPlot(X_test_pca, y_test) # Print out performance values for key, value in performance.items(): print('%s : %.2f' % (key, value)) print("\n")
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # Logistic_Regression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh ) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out ) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum() ) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum() ) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood ) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params