Example #1
0
    def save_oos_pred(self) :
        """
        For all the models specified in self.d_model,
        get OOS predictions to get LV1 train features

        Save them to a csv file
        """

        #Loop over estimators
        for model_name in self.d_model.keys() :
            #Only train if desired
            if self.d_model[model_name]["train"]:
                # Get data
                feat_choice = self.d_model[model_name]["feat_choice"]
                X, y, Id = prep.prepare_lv1_data(feat_choice, "train")
                y_keep = y.copy() # keep a copy of y to deal with classifier which may modify y
                print "Compute OOS pred for model: ", model_name
                # Compute OOS preds
                y_pred_oos, list_col = self._get_oos_preds(X, y, Id, model_name)
                #Save predictions to csv file with pandas
                Id = np.reshape(Id, (Id.shape[0],1))
                y_keep = np.reshape(y_keep, (y_keep.shape[0],1))
                y_pred_oos = np.reshape(y_pred_oos, (y_pred_oos.shape[0], len(list_col)-2))
                data = np.hstack((Id, y_pred_oos, y_keep))
                df = pd.DataFrame(data, columns=list_col)
                df[["Id", "Response"]] = df[["Id", "Response"]].astype(int)
                df = df.sort_values("Id")
                # Specify output file name
                out_name = "./Data/Level1_model_files/Train/%s_%s_train.csv" % (model_name, feat_choice)
                # Special case for KNN : specify n_neighbors in out_name
                if "knn" in model_name :
                    out_name = "./Data/Level1_model_files/Train/%s_%s_%s_train.csv" % \
                    (model_name, feat_choice, self.d_model[model_name]["n_neighbors"])
                df.to_csv(out_name, index = False, float_format='%.3f')
    def save_test_pred(self):
        """
        For all the models specified in self.d_model,
        get  LV1 test features

        Save them to a csv file
        """

        if not os.path.exists("./Data/Level1_model_files/Test/"):
            os.makedirs("./Data/Level1_model_files/Test/")

        #Loop over estimators
        for model_name in self.d_model.keys():
            #Only train if desired
            if self.d_model[model_name]["train"]:
                # Get data
                feat_choice = self.d_model[model_name]["feat_choice"]
                X_train, y_train, Id_train = prep.prepare_lv1_data(
                    feat_choice, "train")
                X_test, dummy, Id_test = prep.prepare_lv1_data(
                    feat_choice, "test")
                print "Compute test pred for model: ", model_name
                # Compute test preds
                y_pred_test, list_col = self._get_test_preds(
                    X_train, y_train, Id_train, X_test, Id_test, model_name)

                #Save predictions to csv file with pandas
                Id_test = np.reshape(Id_test, (Id_test.shape[0], 1))
                y_pred_test = np.reshape(
                    y_pred_test, (y_pred_test.shape[0], len(list_col) - 1))
                data = np.hstack((Id_test, y_pred_test))
                df = pd.DataFrame(data, columns=list_col)
                df["Id"] = df["Id"].astype(int)
                df = df.sort_values("Id")

                # Specify output file name
                out_name = "./Data/Level1_model_files/Test/%s_%s_test.csv" % (
                    model_name, feat_choice)
                # Special case for KNN : specify n_neighbors in out_name
                if "knn" in model_name:
                    out_name = "./Data/Level1_model_files/Test/%s_%s_%s_test.csv" % \
                    (model_name, feat_choice, self.d_model[model_name]["n_neighbors"])
                df.to_csv(out_name, index=False, float_format='%.3f')
Example #3
0
    def save_test_pred(self) :
        """
        For all the models specified in self.d_model,
        get  LV1 test features

        Save them to a csv file
        """

        if not os.path.exists("./Data/Level1_model_files/Test/"): 
            os.makedirs("./Data/Level1_model_files/Test/")

        #Loop over estimators
        for model_name in self.d_model.keys() :
            #Only train if desired
            if self.d_model[model_name]["train"]:
                # Get data
                feat_choice = self.d_model[model_name]["feat_choice"]
                X_train, y_train, Id_train = prep.prepare_lv1_data(feat_choice, "train")
                X_test,dummy, Id_test = prep.prepare_lv1_data(feat_choice, "test")
                print "Compute test pred for model: ", model_name
                # Compute test preds
                y_pred_test, list_col = self._get_test_preds(X_train, y_train, Id_train, X_test, Id_test, model_name)

                #Save predictions to csv file with pandas
                Id_test = np.reshape(Id_test, (Id_test.shape[0],1))
                y_pred_test = np.reshape(y_pred_test, (y_pred_test.shape[0], len(list_col)-1))
                data = np.hstack((Id_test, y_pred_test))
                df = pd.DataFrame(data, columns=list_col)
                df["Id"] = df["Id"].astype(int)
                df = df.sort_values("Id")

                # Specify output file name
                out_name = "./Data/Level1_model_files/Test/%s_%s_test.csv" % (model_name, feat_choice)
                # Special case for KNN : specify n_neighbors in out_name
                if "knn" in model_name :
                    out_name = "./Data/Level1_model_files/Test/%s_%s_%s_test.csv" % \
                    (model_name, feat_choice, self.d_model[model_name]["n_neighbors"])
                df.to_csv(out_name, index = False, float_format='%.3f')
    def save_oos_pred(self):
        """
        For all the models specified in self.d_model,
        get OOS predictions to get LV1 train features

        Save them to a csv file
        """

        #Loop over estimators
        for model_name in self.d_model.keys():
            #Only train if desired
            if self.d_model[model_name]["train"]:
                # Get data
                feat_choice = self.d_model[model_name]["feat_choice"]
                X, y, Id = prep.prepare_lv1_data(feat_choice, "train")
                y_keep = y.copy(
                )  # keep a copy of y to deal with classifier which may modify y
                print "Compute OOS pred for model: ", model_name
                # Compute OOS preds
                y_pred_oos, list_col = self._get_oos_preds(
                    X, y, Id, model_name)
                #Save predictions to csv file with pandas
                Id = np.reshape(Id, (Id.shape[0], 1))
                y_keep = np.reshape(y_keep, (y_keep.shape[0], 1))
                y_pred_oos = np.reshape(
                    y_pred_oos, (y_pred_oos.shape[0], len(list_col) - 2))
                data = np.hstack((Id, y_pred_oos, y_keep))
                df = pd.DataFrame(data, columns=list_col)
                df[["Id", "Response"]] = df[["Id", "Response"]].astype(int)
                df = df.sort_values("Id")
                # Specify output file name
                out_name = "./Data/Level1_model_files/Train/%s_%s_train.csv" % (
                    model_name, feat_choice)
                # Special case for KNN : specify n_neighbors in out_name
                if "knn" in model_name:
                    out_name = "./Data/Level1_model_files/Train/%s_%s_%s_train.csv" % \
                    (model_name, feat_choice, self.d_model[model_name]["n_neighbors"])
                df.to_csv(out_name, index=False, float_format='%.3f')