def predict_validation(self, predictor, tag): valid_pred_result = common.predict_proba_or_label(predictor, self.Xvalid, self.yvalid.index, tag) valid_pred_result.to_csv("meta_features/{}_validate.csv".format(tag), index_label="id") colname_proba = "{}_proba".format(tag) if colname_proba in valid_pred_result: yvalid_pred_proba = valid_pred_result.loc[:, colname_proba] valid_auc = roc_auc_score(self.yvalid, yvalid_pred_proba) valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_proba > 0.5) else: assert valid_pred_result.shape[1] == 1, "only one label column" yvalid_pred_label = valid_pred_result.iloc[:, 0] valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_label) valid_auc = np.NaN self.stats_file.log(tag, valid_accuracy, valid_auc, str(predictor))
def run_once(self, predictor, tag): # ---------- fit on all training dataset to get the model predictor.fit(self.Xtrain, self.ytrain) common.simple_dump("meta_features/{}.pkl".format(tag), predictor) print "\tModel[{}] learnt and saved".format(tag) # ---------- predict on validation self.predict_validation(predictor, tag) print "\tModel[{}] predicted on validation".format(tag) # ---------- predict on test test_pred_result = common.predict_proba_or_label(predictor, self.Xtest, self.ytest.index, tag) test_pred_result.to_csv("meta_features/{}_test.csv".format(tag), index_label="id") print "\tModel[{}] predicted on test".format(tag) # ---------- generate meta features metafeatures = crossval_predict(predictor, self.Xtrain, self.ytrain, tag) metafeatures.to_csv("meta_features/{}_train.csv".format(tag), index_label="id") print "\tMeta-features generated from Model[{}]".format(tag)
def predict_validation(self, predictor, tag): valid_pred_result = common.predict_proba_or_label( predictor, self.Xvalid, self.yvalid.index, tag) valid_pred_result.to_csv("meta_features/{}_validate.csv".format(tag), index_label='id') colname_proba = '{}_proba'.format(tag) if colname_proba in valid_pred_result: yvalid_pred_proba = valid_pred_result.loc[:, colname_proba] valid_auc = roc_auc_score(self.yvalid, yvalid_pred_proba) valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_proba > 0.5) else: assert valid_pred_result.shape[1] == 1, 'only one label column' yvalid_pred_label = valid_pred_result.iloc[:, 0] valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_label) valid_auc = np.NaN self.stats_file.log(tag, valid_accuracy, valid_auc, str(predictor))
def run_once(self, predictor, tag): # ---------- fit on all training dataset to get the model predictor.fit(self.Xtrain, self.ytrain) common.simple_dump("meta_features/{}.pkl".format(tag), predictor) print "\tModel[{}] learnt and saved".format(tag) # ---------- predict on validation self.predict_validation(predictor, tag) print "\tModel[{}] predicted on validation".format(tag) # ---------- predict on test test_pred_result = common.predict_proba_or_label( predictor, self.Xtest, self.ytest.index, tag) test_pred_result.to_csv("meta_features/{}_test.csv".format(tag), index_label='id') print "\tModel[{}] predicted on test".format(tag) # ---------- generate meta features metafeatures = crossval_predict(predictor, self.Xtrain, self.ytrain, tag) metafeatures.to_csv("meta_features/{}_train.csv".format(tag), index_label='id') print "\tMeta-features generated from Model[{}]".format(tag)