예제 #1
0
 def get_loss(self, X_valid, Y_valid):
     """
     计算这一个样例的相关系数
     """
     Y_valid = Y_valid.reshape(-1)
     y_pred = self.predict(X_valid)
     logger.debug('y_pred : shape{}'.format(y_pred.shape))
     logger.debug('Y_valid : shape{}'.format(Y_valid.shape))
     return precision_score(y_pred, Y_valid)
예제 #2
0
    def get_train_and_valid_result(self):
        """
        Returns
        -------------
        train_loss : float
        valid_loss : float
        """
        Y_train_pred = np.round(self.forward(X_test=self.X_train,
                                             predict=True)).reshape((-1))
        logger.debug('Y_train_pred : \n{}'.format(Y_train_pred))
        logger.debug('self.Y_train : \n{}'.format(self.Y_train))
        train_loss = precision_score(Y_train_pred, self.Y_train.reshape(-1))

        if self.X_valid is not None:
            Y_valid_pred = np.round(
                self.forward(X_test=self.X_valid, predict=True))
            Y_valid_pred.reshape((-1))
            logger.debug('Y_valid_pred : \n{}'.format(Y_valid_pred))
            valid_loss = precision_score(Y_valid_pred,
                                         self.Y_valid.reshape(-1))
        else:
            valid_loss = 0

        return train_loss, valid_loss
예제 #3
0
train_Y = train_ori_Y

train_X.shape, train_Y.shape

# # 交叉验证

k_range = range(2, 21)
n_splits = 2
ms = ShuffleSplit(n_splits=n_splits)
k_scores = np.zeros((len(k_range)))
for train_indices, test_indices in ms.split(train_X):
    for i, k in enumerate(k_range):
        clf = KNeighborsClassifier(k=k)
        clf.fit(train_X[train_indices], train_Y[train_indices])
        y_pred = clf.predict(train_X[test_indices])
        score = precision_score(train_Y[test_indices], y_pred)
        print('k : {} score: {}'.format(k, score))
        k_scores[i] += score
avg_k_scores = k_scores / n_splits
print(avg_k_scores)
print("best k ", np.argmax(avg_k_scores) + 1)

for k in [13, 14, 15, 18, 19, 20]:
    clf = KNeighborsClassifier(k=k)
    clf.fit(train_X, train_Y)
    y_pred = clf.predict(test_X)
    sub = pd.DataFrame(y_pred)
    sub.to_csv('../results/' + 'KNN-' + str(k) + '-' +
               str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) +
               ".csv",
               index=0,
예제 #4
0
 def test_precision_score(self):
     y_pred = np.array([1, 2, 3, 4, 5, 6, 3, 1])
     y_true = np.array([1, 2, 3, 4, 5, 6, 4, 1])
     # 默认,要7位有效数字都要相同
     self.assertAlmostEqual(precision_score(y_true, y_pred), 7 / 8)
예제 #5
0
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.

# reshape x (n_samples, n_features)
train_x = train_set_x.T
test_x = test_set_x.T

# reshape y (n_samples, )
train_y = train_set_y.reshape((-1))
test_y = test_set_y.reshape((-1))

lc = LogisticClassifier(learning_rate=0.005)
lc.fit(train_x, train_y, watch=True)
y_pred = lc.predict(test_x)

print(precision_score(test_y, y_pred))

"""
Cost after iteration 0: 0.693147
Cost after iteration 100: 0.584508
Cost after iteration 200: 0.466949
Cost after iteration 300: 0.376007
Cost after iteration 400: 0.331463
Cost after iteration 500: 0.303273
Cost after iteration 600: 0.279880
Cost after iteration 700: 0.260042
Cost after iteration 800: 0.242941
Cost after iteration 900: 0.228004
Cost after iteration 1000: 0.214820
Cost after iteration 1100: 0.203078
Cost after iteration 1200: 0.192544