Ejemplo n.º 1
0
def main_preprocessing_mmc(conf, conf_model, dataset="train"):
    context_size = conf_model["context_size"]
    v = conf_model["verbose"]
    path_model_df_prep = Path(conf_model["path"] + "df_%s_preprocessed.csv" % dataset)
    if conf_model["preprocessing_%s" % dataset][0]:
        path_dataprep = Path(conf["paths"]["dataprep"] + "df_%s.csv" % dataset)
        df = u.load_file(path_dataprep)
        df = preprocess_text_data(df, verbose=v)
        u.record_file(df, path_model_df_prep)
    else:
        df = u.load_file(path_model_df_prep)
        df["txt"] = df["txt"].map(eval)
    # Building learning matrix
    path_model_df_learn = conf_model["path"] + "df_%s_learning_cs%d.csv" % (dataset, context_size)
    if conf_model["preprocessing_%s" % dataset][1]:
        df = create_dataframe_one_line_per_date_with_context(df, context_size, verbose=v)
        u.vprint("Recording data", v)
        u.record_file(df, path_model_df_learn)
    else:
        u.vprint("Loading data in one line per date", v)
        df = u.load_file(Path(path_model_df_learn))
    df[["pos_moy", "part_moy", "nb_app"]] = df[["pos_moy", "part_moy", "nb_app"]].astype(float)
    X = df[["txt_id", "date", "context_date", "pos_moy", "part_moy", "nb_app"]].values
    if "target" in df.columns:
        y = df["target"].astype(int).values
        u.vprint("X shape : %s" % str(X.shape), v)
        u.vprint("y shape : %s" % str(y.shape), v)
        return X, y
    else:
        u.vprint("X shape : %s" % str(X.shape), v)
        return X
Ejemplo n.º 2
0
 def save_info_dataset(self, info_datasets):
     for dataset, path in info_datasets:
         name = Path(path).name
         if isinstance(dataset, pd.DataFrame):
             sample = dataset.head(min(5, len(dataset)))
         else:
             sample = pd.DataFrame(dataset[0:5])
         shape = dataset.shape
         u.record_file(sample,
                       self.exp_dir / Path("dataset_%s_sample.csv" % name))
         info = "shape : %s\n" % str(shape) + "path : %s" % str(path)
         u.record_file(
             info, self.exp_dir / Path("dataset_%s_info.txt" % str(name)))
Ejemplo n.º 3
0
def main_ml_multi_classes_search_best_model(conf):
    conf_model = conf["models"]["ml_multi_classes"]
    v = conf_model["verbose"]
    summary = "\n******************************************* NEW SESSION *******************************************\n"
    summary += str(datetime.now()) + "\n"
    summary += str(conf_model) + "\n"

    ### Preprocessing
    X, y = main_preprocessing_mmc(conf, conf_model, dataset="train")

    ### Split train, test
    u.vprint("Splitting data in train and test", v)
    X_train, X_test, y_train, y_test = split_train_test(X, y)

    ### Learning
    # Get the estimator
    u.vprint("Initializing estimator", v)
    estimator = get_estimator(conf_model)

    # Grid search
    exp_dir = get_experiment_directory(conf_model)
    if conf_model["search_best_params"]:
        u.vprint("Performing best params search", v)
        best_params = search_best_params(conf_model, estimator, X_train, y_train)
        u.record_file(best_params, exp_dir / "best_params.json")

    # Set params
    estimator = set_estimator_params(estimator, conf_model, exp_dir)
    summary += str(estimator.get_params()) + "\n"

    # Learning
    u.vprint("Learning phase", v)
    estimator.fit(X_train, y_train)

    # Assessing :
    u.vprint("Assessing phase", v)
    assess_summary = main_assessing(conf, conf_model, estimator, X_train, X_test, y_train, y_test)

    summary += assess_summary
    u.record_file(summary, exp_dir / "summary_results.txt", mode="a")
Ejemplo n.º 4
0
def main_na_nc_classifier(conf):
    conf_model = conf["models"]["na_nc_classifier"]
    v = conf_model["verbose"]

    # Preprocessing
    path_model_df_prep = Path(conf_model["path"] + "df_train_preprocessed.csv")
    if conf_model["preprocessing"]:
        path_dataprep = Path(conf["paths"]["dataprep"] + "df_train.csv")
        df_train = u.load_file(path_dataprep)
        df_train = preprocessing_data(df_train, verbose=v)
        u.record_file(df_train, path_model_df_prep)
    else:
        df_train = u.load_file(path_model_df_prep)
    X = df_train["txt"].values
    y = df_train["date_consolidation"].values

    ### Split train, test
    u.vprint("Splitting data in train and test", v)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    ### Learning
    # Get the estimator
    u.vprint("Initializing estimator", v)
    estimator = get_estimator(conf_model)

    # Grid search
    exp_dir = get_experiment_directory(conf_model)
    if conf_model["search_best_params"]:
        u.vprint("Performing best params search", v)
        best_params = search_best_params(conf_model, estimator, X_train,
                                         y_train)
        u.record_file(best_params, exp_dir / "best_params.json")

    # Set params
    estimator = set_estimator_params(estimator, conf_model, exp_dir)

    # Learning
    path_pickle_model = exp_dir / "fitted_model.pkl"
    if conf_model["learning"]:
        estimator.fit(X_train, y_train)
        u.record_file(estimator, path_pickle_model)

    # Assessing
    res1_train, res1_val = cross_validate_model(conf_model, estimator, X_train,
                                                y_train)
    u.vprint("Cross validation results : ", v)
    print(res1_train)
    print(res1_val)
    res_test = eval_model(estimator, X_train, y_train, X_test, y_test)
    u.vprint("Test results : ", v)
    print(res_test)
Ejemplo n.º 5
0
 def save_grid_search_results(self, grid_search_results):
     u.record_file(grid_search_results,
                   self.exp_dir / "grid_search_results.pkl")
     pass
Ejemplo n.º 6
0
 def save_prediction(self, predictions):
     u.record_file(predictions, self.exp_dir / "prediction.csv")
Ejemplo n.º 7
0
 def save_evaluation_results(self, results):
     u.record_file(results, self.exp_dir / Path("results.csv"), index=True)
Ejemplo n.º 8
0
 def save_model(self, model):
     u.record_file(
         model, self.exp_dir / Path("model_%s.pkl" % str(self.model_name)))
Ejemplo n.º 9
0
 def save_exp_context(self):
     u.record_file(str(self.__dict__), self.exp_dir / "context.json")