def predict_validation(self, predictor, tag):
        valid_pred_result = common.predict_proba_or_label(predictor, self.Xvalid, self.yvalid.index, tag)
        valid_pred_result.to_csv("meta_features/{}_validate.csv".format(tag), index_label="id")

        colname_proba = "{}_proba".format(tag)
        if colname_proba in valid_pred_result:
            yvalid_pred_proba = valid_pred_result.loc[:, colname_proba]
            valid_auc = roc_auc_score(self.yvalid, yvalid_pred_proba)
            valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_proba > 0.5)
        else:
            assert valid_pred_result.shape[1] == 1, "only one label column"
            yvalid_pred_label = valid_pred_result.iloc[:, 0]
            valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_label)
            valid_auc = np.NaN

        self.stats_file.log(tag, valid_accuracy, valid_auc, str(predictor))
    def run_once(self, predictor, tag):
        # ---------- fit on all training dataset to get the model
        predictor.fit(self.Xtrain, self.ytrain)
        common.simple_dump("meta_features/{}.pkl".format(tag), predictor)
        print "\tModel[{}] learnt and saved".format(tag)

        # ---------- predict on validation
        self.predict_validation(predictor, tag)
        print "\tModel[{}] predicted on validation".format(tag)

        # ---------- predict on test
        test_pred_result = common.predict_proba_or_label(predictor, self.Xtest, self.ytest.index, tag)
        test_pred_result.to_csv("meta_features/{}_test.csv".format(tag), index_label="id")
        print "\tModel[{}] predicted on test".format(tag)

        # ---------- generate meta features
        metafeatures = crossval_predict(predictor, self.Xtrain, self.ytrain, tag)
        metafeatures.to_csv("meta_features/{}_train.csv".format(tag), index_label="id")
        print "\tMeta-features generated from Model[{}]".format(tag)
    def predict_validation(self, predictor, tag):
        valid_pred_result = common.predict_proba_or_label(
            predictor, self.Xvalid, self.yvalid.index, tag)
        valid_pred_result.to_csv("meta_features/{}_validate.csv".format(tag),
                                 index_label='id')

        colname_proba = '{}_proba'.format(tag)
        if colname_proba in valid_pred_result:
            yvalid_pred_proba = valid_pred_result.loc[:, colname_proba]
            valid_auc = roc_auc_score(self.yvalid, yvalid_pred_proba)
            valid_accuracy = accuracy_score(self.yvalid,
                                            yvalid_pred_proba > 0.5)
        else:
            assert valid_pred_result.shape[1] == 1, 'only one label column'
            yvalid_pred_label = valid_pred_result.iloc[:, 0]
            valid_accuracy = accuracy_score(self.yvalid, yvalid_pred_label)
            valid_auc = np.NaN

        self.stats_file.log(tag, valid_accuracy, valid_auc, str(predictor))
    def run_once(self, predictor, tag):
        # ---------- fit on all training dataset to get the model
        predictor.fit(self.Xtrain, self.ytrain)
        common.simple_dump("meta_features/{}.pkl".format(tag), predictor)
        print "\tModel[{}] learnt and saved".format(tag)

        # ---------- predict on validation
        self.predict_validation(predictor, tag)
        print "\tModel[{}] predicted on validation".format(tag)

        # ---------- predict on test
        test_pred_result = common.predict_proba_or_label(
            predictor, self.Xtest, self.ytest.index, tag)
        test_pred_result.to_csv("meta_features/{}_test.csv".format(tag),
                                index_label='id')
        print "\tModel[{}] predicted on test".format(tag)

        # ---------- generate meta features
        metafeatures = crossval_predict(predictor, self.Xtrain, self.ytrain,
                                        tag)
        metafeatures.to_csv("meta_features/{}_train.csv".format(tag),
                            index_label='id')
        print "\tMeta-features generated from Model[{}]".format(tag)