def test_roc_auc_score(): y_true = np.array([0, 0, 1, 1]) y_pred = np.array([0.1, 0.4, 0.35, 0.8]) assert_almost_equal(roc_auc_score(y_true, y_pred), sklearn_roc_auc_score(y_true, y_pred)) y_true = np.array([0, 0, 1, 1, 0]) y_pred = np.array([0.8, 0.4, 0.4, 0.8, 0.8]) assert_almost_equal(roc_auc_score(y_true, y_pred), sklearn_roc_auc_score(y_true, y_pred))
def get_loss(self, y, y_pred): if self.metric == 'error': return 1 - accuracy_score(y, y_pred) elif self.metric == 'auc': # TODO: Add a warning checking if y_predict is all [0, 1], it should be probability return 1 - roc_auc_score(y, y_pred) else: raise Exception("Not implemented yet.")
def mlp(path): X,y,Xt,yt,cols = get_data(fillna=0, norm=True, dropids=True) X = cp.asarray(X) y = cp.asarray(y) Xt = cp.asarray(Xt) yt = cp.asarray(yt) params = { 'learning_rate_init':0.005, 'verbose': True, 'hidden_layer_sizes': (64,64,64), 'max_iter':100, 'batch_size':4096, 'shuffle':True, 'alpha':0.000, 'model_path':f'{path}/cache/mlp.pth', } clf = MLPClassifier(**params) clf.fit(X, y, Xt, yt) yp = clf.predict_proba(Xt)[:,1] score = roc_auc_score(yt, yp) print('AUC: %.4f'%score) cp.save('mlp.npy', yp) #return yp print(cols) return score
def xgb(path): X, y, Xt, yt, cols = get_data() params = { 'n_estimators': 100, 'eta': 0.1, 'early_stopping_rounds': 10, 'max_depth': 7, 'colsample_bytree': 1.0, 'subsample': 0.5, 'verbosity': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc', 'validation_fraction': 0, } clf = XGBClassifier(**params) clf.fit(X, y, Xt, yt) clf.clf.save_model(f'{path}/cache/xgb.json') yp = clf.predict_proba(Xt) #return yp print(cols) cp.save('xgb_va.npy', cp.asarray(yp)) yx = clf.predict_proba(X) cp.save('xgb_tr.npy', cp.asarray(yx)) return roc_auc_score(yt, yp)
def test_roc_auc_score_at_limits(): y_true = np.array([0., 0., 0.], dtype=np.float) y_pred = np.array([0., 0.5, 1.], dtype=np.float) err_msg = ("roc_auc_score cannot be used when " "only one class present in y_true. ROC AUC score " "is not defined in that case.") with pytest.raises(ValueError, match=err_msg): roc_auc_score(y_true, y_pred) y_true = np.array([0., 0.5, 1.0], dtype=np.float) y_pred = np.array([0., 0.5, 1.], dtype=np.float) err_msg = ("Continuous format of y_true " "is not supported.") with pytest.raises(ValueError, match=err_msg): roc_auc_score(y_true, y_pred)
def test_roc_auc_score_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 2, n_samples).astype(dtype)) y_pred, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) auc = roc_auc_score(y_true, y_pred) skl_auc = sklearn_roc_auc_score(y_true, y_pred) assert_almost_equal(auc, skl_auc)
def _calc_score_cuml(y_true, y_preds, y_proba=None, metrics=('accuracy',), task=const.TASK_BINARY, pos_label=1, classes=None, average=None): if y_proba is None: y_proba = y_preds if len(y_proba.shape) == 2 and y_proba.shape[-1] == 1: y_proba = y_proba.reshape(-1) if len(y_preds.shape) == 2 and y_preds.shape[-1] == 1: y_preds = y_preds.reshape(-1) y_true = _to_dtype(y_true, 'float64') y_preds = _to_dtype(y_preds, 'float64') y_proba = _to_dtype(y_proba, 'float64') if task == const.TASK_REGRESSION: if isinstance(y_true, cudf.Series): y_true = y_true.values if isinstance(y_preds, cudf.Series): y_preds = y_preds.values if isinstance(y_proba, cudf.Series): y_proba = y_proba.values scores = {} for metric in metrics: if callable(metric): scores[metric.__name__] = metric(y_true, y_preds) else: metric_lower = metric.lower() if metric_lower == 'auc': if len(y_proba.shape) == 2: # if task == const.TASK_MULTICLASS: # s = cu_metrics.roc_auc_score(y_true, y_proba, multi_class='ovo', labels=classes) # else: # s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1]) s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1]) else: s = cu_metrics.roc_auc_score(y_true, y_proba) elif metric_lower == 'accuracy': if y_preds is None: s = 0 else: s = cu_metrics.accuracy_score(y_true, y_preds) # elif metric_lower == 'recall': # s = cu_metrics.recall_score(y_true, y_preds, **recall_options) # elif metric_lower == 'precision': # s = cu_metrics.precision_score(y_true, y_preds, **recall_options) # elif metric_lower == 'f1': # s = cu_metrics.f1_score(y_true, y_preds, **recall_options) elif metric_lower == 'mse': s = cu_metrics.mean_squared_error(y_true, y_preds) elif metric_lower == 'mae': s = cu_metrics.mean_absolute_error(y_true, y_preds) elif metric_lower == 'msle': s = cu_metrics.mean_squared_log_error(y_true, y_preds) elif metric_lower in {'rmse', 'rootmeansquarederror', 'root_mean_squared_error'}: s = cu_metrics.mean_squared_error(y_true, y_preds, squared=False) elif metric_lower == 'r2': s = cu_metrics.r2_score(y_true, y_preds) elif metric_lower in {'logloss', 'log_loss'}: # s = cu_metrics.log_loss(y_true, y_proba, labels=classes) s = cu_metrics.log_loss(y_true, y_proba) else: logger.warning(f'unknown metric: {metric}') continue if isinstance(s, cp.ndarray): s = float(cp.asnumpy(s)) scores[metric] = s return scores
def metrics(y_true, y_score): auc = roc_auc_score(y_true=y_true, y_score=y_score) ap = average_precision_score(y_true, y_score) return [auc, ap]
import cupy as cp from cuml.metrics import roc_auc_score y1 = cp.load('backup/2021-01-03-20-58-05-DGX-S/mlp.npy') y2 = cp.load('backup/2021-01-03-21-39-45-DGX-S/mlp.npy') y3 = cp.load('backup/2021-01-03-21-56-52-DGX-S/mlp.npy') yt = cp.load('yt.npy') y1 = (y1 + y2 + y3) / 3 y2 = cp.load('xgb_va.npy') print('AUC: %.4f' % roc_auc_score(yt, y1)) y1 = y1.argsort().argsort() / y1.shape[0] y2 = y2.argsort().argsort() / y2.shape[0] for i in range(11): w = i * 0.1 yp = y1 * w + y2 * (1 - w) print(i, 'AUC: %.4f' % roc_auc_score(yt, yp))
def fit(self,train_dl,valid_dl=None): lr = self.params['learning_rate_init'] epochs = self.params['max_iter'] opt_type = self.params['solver'] wd = self.params['alpha'] momentum = self.params['momentum'] beta1, beta2 = self.params['beta_1'], self.params['beta_2'] eps = self.params['epsilon'] if opt_type == 'sgd': opt = torch.optim.SGD(self.model.parameters(), lr=lr, weight_decay=wd, momentum=momentum, nesterov=self.params['nesterovs_momentum']) elif opt_type == 'adam': opt = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd, betas=(beta1, beta2), eps=eps) else: opt = torch.optim.LBFGS(self.model.parameters(), lr=lr) best_score = 1e9 # lower the better not_improved_iter = 0 tol = self.params['tol'] n_iter_no_change = self.params['n_iter_no_change'] best = -1 best_iter = 0 for epoch in range(epochs): train_loss = 0 start = time.time() ys,yps = [],[] for batch in train_dl: xb, yb = batch xb, yb = xb.cuda(),yb.cuda() pred = self.model(xb) loss = self.loss_func(pred, yb) ys.append(yb.detach().cpu().numpy()) yps.append(pred.detach().cpu().numpy()) def closure(): return loss train_loss += loss.cpu().detach().numpy() loss.backward() if opt_type!='lbfgs': opt.step() else: opt.step(closure) opt.zero_grad() ys = np.concatenate(ys) yps = np.vstack(yps) yps = cp.asarray(yps) yps = softmax(yps)[:,1].ravel() #print(ys[:10], yps[:10]) auc = roc_auc_score(ys, yps) if valid_dl is None: duration = time.time() - start print('Epoch %d Training Loss:%.4f Time: %.2f seconds'%(epoch, train_loss/train_dl.total_batches,duration)) continue valid_loss = 0 ys,yps = [],[] for batch in valid_dl: xb, yb = batch xb, yb = xb.cuda(),yb.cuda() pred = self.model(xb) loss = self.loss_func(pred, yb) valid_loss += loss.cpu().detach().numpy() ys.append(yb.detach().cpu().numpy()) yps.append(pred.detach().cpu().numpy()) ys = np.concatenate(ys) yps = np.vstack(yps) yps = cp.asarray(yps) yps = softmax(yps)[:,1].ravel() #print(ys[:10], yps[:10]) auc_va = roc_auc_score(ys, yps) if best < auc_va: best = auc_va best_iter = epoch torch.save(self.model.state_dict(), self.params['model_path']) torch.save(self.model.state_dict(), './mlp.pth') valid_loss /= valid_dl.total_batches duration = time.time() - start if self.params['verbose']: print('Epoch %d Training Loss:%.4f AUC:%.4f Valid Loss:%4f AUC:%.4f Best:%.4f Best Iter: %d Time: %.2f seconds'%(epoch, train_loss/train_dl.total_batches, auc, valid_loss, auc_va, best, best_iter, duration)) if valid_loss < best_score - tol: best_score = valid_loss else: not_improved_iter += 1 if self.params['early_stopping'] and not_improved_iter > n_iter_no_change: break