コード例 #1
0
ファイル: data_ingestion.py プロジェクト: ArtMana/Books
def generate_model_text_features(raw_df_path, save_path=None):
    """
    A function to generate features for model 2 and save them to disk.
    These features take multiple minutes to compute
    :param raw_df_path: path to raw DataFrame (generated from parse_xml_to_csv)
    :param save_path: path to save processed DataFrame to
    :return: processed DataFrame
    """
    df = pd.read_csv(raw_df_path)
    df = format_raw_df(df.copy())
    df = df.loc[df["is_question"]].copy()
    df["full_text"] = df["Title"].str.cat(df["body_text"], sep=" ", na_rep="")

    df = add_v1_features(df.copy())
    df = add_v2_text_features(df.copy())

    if save_path:
        df.to_csv(save_path)
    return df
コード例 #2
0
def generate_model_text_features(raw_df_path, save_path=None):
    """
    모델 2를 위한 특성을 생성하고 디스크에 저장하는 함수
    이 특성을 계산하는데 몇 분 정도 걸립니다.
    :param raw_df_path: (parse_xml_to_csv에서 생성한) 원본 DataFrame 경로
    :param save_path: 처리된 DataFrame을 저장할 경로
    :return: 처리된 DataFrame
    """
    df = pd.read_csv(raw_df_path)
    df = format_raw_df(df.copy())
    df = df.loc[df["is_question"]].copy()
    df["full_text"] = df["Title"].str.cat(df["body_text"], sep=" ", na_rep="")

    df = add_v1_features(df.copy())
    df = add_v2_text_features(df.copy())

    if save_path:
        df.to_csv(save_path)
    return df
コード例 #3
0
ファイル: test_model.py プロジェクト: ArtMana/Books
def df_with_features():
    df = pd.read_csv(CURR_PATH / CSV_PATH)
    df = format_raw_df(df.copy())
    return add_text_features_to_df(df.copy())