def get_loss(self, X_valid, Y_valid): """ 计算这一个样例的相关系数 """ Y_valid = Y_valid.reshape(-1) y_pred = self.predict(X_valid) logger.debug('y_pred : shape{}'.format(y_pred.shape)) logger.debug('Y_valid : shape{}'.format(Y_valid.shape)) return precision_score(y_pred, Y_valid)
def get_train_and_valid_result(self): """ Returns ------------- train_loss : float valid_loss : float """ Y_train_pred = np.round(self.forward(X_test=self.X_train, predict=True)).reshape((-1)) logger.debug('Y_train_pred : \n{}'.format(Y_train_pred)) logger.debug('self.Y_train : \n{}'.format(self.Y_train)) train_loss = precision_score(Y_train_pred, self.Y_train.reshape(-1)) if self.X_valid is not None: Y_valid_pred = np.round( self.forward(X_test=self.X_valid, predict=True)) Y_valid_pred.reshape((-1)) logger.debug('Y_valid_pred : \n{}'.format(Y_valid_pred)) valid_loss = precision_score(Y_valid_pred, self.Y_valid.reshape(-1)) else: valid_loss = 0 return train_loss, valid_loss
train_Y = train_ori_Y train_X.shape, train_Y.shape # # 交叉验证 k_range = range(2, 21) n_splits = 2 ms = ShuffleSplit(n_splits=n_splits) k_scores = np.zeros((len(k_range))) for train_indices, test_indices in ms.split(train_X): for i, k in enumerate(k_range): clf = KNeighborsClassifier(k=k) clf.fit(train_X[train_indices], train_Y[train_indices]) y_pred = clf.predict(train_X[test_indices]) score = precision_score(train_Y[test_indices], y_pred) print('k : {} score: {}'.format(k, score)) k_scores[i] += score avg_k_scores = k_scores / n_splits print(avg_k_scores) print("best k ", np.argmax(avg_k_scores) + 1) for k in [13, 14, 15, 18, 19, 20]: clf = KNeighborsClassifier(k=k) clf.fit(train_X, train_Y) y_pred = clf.predict(test_X) sub = pd.DataFrame(y_pred) sub.to_csv('../results/' + 'KNN-' + str(k) + '-' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + ".csv", index=0,
def test_precision_score(self): y_pred = np.array([1, 2, 3, 4, 5, 6, 3, 1]) y_true = np.array([1, 2, 3, 4, 5, 6, 4, 1]) # 默认,要7位有效数字都要相同 self.assertAlmostEqual(precision_score(y_true, y_pred), 7 / 8)
train_set_x = train_set_x_flatten/255. test_set_x = test_set_x_flatten/255. # reshape x (n_samples, n_features) train_x = train_set_x.T test_x = test_set_x.T # reshape y (n_samples, ) train_y = train_set_y.reshape((-1)) test_y = test_set_y.reshape((-1)) lc = LogisticClassifier(learning_rate=0.005) lc.fit(train_x, train_y, watch=True) y_pred = lc.predict(test_x) print(precision_score(test_y, y_pred)) """ Cost after iteration 0: 0.693147 Cost after iteration 100: 0.584508 Cost after iteration 200: 0.466949 Cost after iteration 300: 0.376007 Cost after iteration 400: 0.331463 Cost after iteration 500: 0.303273 Cost after iteration 600: 0.279880 Cost after iteration 700: 0.260042 Cost after iteration 800: 0.242941 Cost after iteration 900: 0.228004 Cost after iteration 1000: 0.214820 Cost after iteration 1100: 0.203078 Cost after iteration 1200: 0.192544