Exemple #1
0
def transform_subs_data():
    df = load_dataframes(SUBS_RAW_DIR, "reddit", ".txt", SUBS_HEADERS)
    if df is None:
        return

    df = transform_data(df)
    df_to_csv(df, SUBS_TRANSFORMED_DIR, "reddit_subs")
    prefix_files(SUBS_RAW_DIR, "transformed")
Exemple #2
0
def transform_posts_data():
    df = load_dataframes(POSTS_RAW_DIR, "reddit", ".txt", POSTS_HEADERS)
    if df is None:
        return

    df = transform_data(df)
    df = filter_data(df)

    df_to_csv(df, POSTS_TRANSFORMED_DIR, "reddit_posts")
    prefix_files(POSTS_RAW_DIR, "transformed")
Exemple #3
0
def transform_users_data():
    df = load_dataframes(USERS_RAW_DIR, "reddit", ".txt", USERS_HEADERS)
    if df is None:
        return

    df = transform_data(df)
    df = filter_users_stats(df)

    df_to_csv(df, USERS_TRANSFORMED_DIR, "reddit_user_stats")
    prefix_files(USERS_RAW_DIR, "transformed")
    log_errors(ERROR_LOG_PATH, "reddit.users_transform", ERRORS)
Exemple #4
0
def parse_tweets_data():
    tweet_files = get_data_filepaths(RAW_DIR, "stf", ".txt")

    for _file in tweet_files:
        for tweet in load_tweets(_file):
            parse_tweet(tweet)
    
    separate_users_tweets()
    save_parsed_tweets()
    save_chosen_for_scoring()
    
    prefix_files(RAW_DIR, "parsed")
Exemple #5
0
def transform_g1_data():
    df = load_dataframes(RAW_DIR, "g1", "txt", HEADERS)
    if df is None:
        return

    df = filter_dataframe(df)

    if not df.empty:
        df = transform_data(df)
        df_to_csv(df, TRANSFORMED_DIR, "g1")

    prefix_files(RAW_DIR, "transformed")
Exemple #6
0
def load_subs_data():
    df = load_dataframes(SUBS_TRANSFORMED_DIR, "reddit", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "reddit", "sub_counts", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(SUBS_TRANSFORMED_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "reddit.subs_load", ERRORS)
Exemple #7
0
def load_g1_data():
    df = load_dataframes(TRANSFORMED_DIR, "g1", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "noticias", "noticias", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(TRANSFORMED_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "g1.load", ERRORS)
Exemple #8
0
def load_users_data():
    df = load_dataframes(USER_CSV_DIR, "users", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "twitter", "users", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(USER_CSV_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "twitter.users_load", ERRORS)