def staged_auc(self,X,y): """ calculate the AUC after each of the stages. returns: ns -- list of iteration numbers aucs -- list of corresponding areas under the curve. """ y = np.array(y) results = [ (n, util.auc(y,p)) for n,p in self.staged_predict(X)] return zip(*results) # Python idiom unzips list into two parallel ones.
def tune_one_fold(options, i, train_i, test_i): """ Tune one fold of the data. """ global train clf = make_pipeline(options) ftrain = train[train_i] logging.info('fold %d' % i) clf.fit(ftrain.Comment, ftrain.Insult) ypred = clf.predict(ftrain.Comment) logging.info("%d train auc=%f" % (i, auc(np.array(ftrain.Insult),ypred))) ypred = clf.predict(train[test_i].Comment) # record information about the auc at each stage of training. xs,ys = clf.staged_auc(train[test_i].Comment, train[test_i].Insult) xs = np.array(xs) ys = np.array(ys) return pandas.DataFrame({ ('auc%d' % i):ys}, index=xs)
def auc(self,X,y): yhat = self.predict(X) return util.auc(np.array(y),yhat)