Python update_table 예제들, db_functions.update_table Python 예제들

예제 #1

0

파일 보기

파일: sn_scrape.py 프로젝트: ChristianZX/TwitterAnalytics

def SN_db_operations(hashtag: str, since: str, until: str) -> tuple:
    """
    1. Creates staging table for new hashtag
    2. Calls tweet ID downloader
    3. Saves tweet IDs to DB
    Downloads TweetIDs of Hashtags via snScrapeTweet
    :param hashtag: hashtag, to be downloaded
    :param since: start date for tweet download (string)
    :param until: until date for tweet download (string)
    :return: table_name, hashtag,  dataframe length
    """

    table_name = db_functions.get_staging_table_name(
        hashtag)  #adds prefix (s_h_) and suffix (dateand time) to tablename
    db_functions.drop_table(table_name)
    db_functions.create_empty_staging_table(table_name)
    tweet_ids = SN_get_tweet_ids_for_hashtag(since, until,
                                             hashtag)  #downloads tweets
    if len(tweet_ids) == 0:
        print("####################################################")
        print("####Warning! No Tweets fetched! Process aborted!####")
        print("####################################################")
        sys.exit()
    df = pd.DataFrame(tweet_ids)
    write_to_table = "update_temp"

    db_functions.df_to_sql(df, write_to_table,
                           drop="replace")  #Write Tweet ID to temp table
    #db_functions.update_table('insert into ' + str(table_name) + ' (id) select cast ("0" as bigint) from update_temp') #insert temp table content into staging table
    db_functions.update_table(
        f'insert into {table_name} (id) select cast ("0" as bigint) from {write_to_table}'
    )  # insert temp table content into staging table

    try:
        print(f"Table {table_name} created with {len(df)} tweets.")
    except:
        print("Unhandled Error")
    db_functions.drop_table(write_to_table)
    return table_name, hashtag, df.shape[0]

예제 #2

0

파일 보기

파일: sn_scrape.py 프로젝트: ChristianZX/TwitterAnalytics

def hashtag_download_launcher(hashtag, since: str, until: str,
                              download_parent_tweets: bool):
    """
    Manages whole hashtag download process.
    Step1: Calls procedures for SQL table creation, Tweet ID download
    Step2: Calls Tweet Details download launcher to add tweet details. Does this in iterations to minimize data loss risk
    Result is stored in staging table s_h_HASHTAG_TIMESTAMP
    :param hashtag: hashtag, to be downloaded
    :param since: start date for tweet download (string)
    :param until: until date for tweet download (string)
    :return: table_name
    """
    table_name, hashtag, len_df = SN_db_operations(hashtag, since, until)
    print("Hashtag Twitter ID download complete. Starting detail download.")
    bulk_size = 1000

    new_tweets_fetched = 1
    loop_counter = 1
    while new_tweets_fetched != 0:
        start_time = time.time()
        print(
            f"Iteration {loop_counter} running. Estimated Iterations {int (len_df / bulk_size) +1+5}"
        )
        #+1 to avoid 0 itteratios, +5 is a good estimate for number of iterations to get all parent tweets
        new_tweets_fetched = TwitterAPI.tweet_details_download_launcher(
            table_name, hashtag, bulk_size, download_parent_tweets)
        loop_counter += 1
        print(f"Iteration {loop_counter} runtime: {time.time() - start_time} ")
    print("Hashtag downloaded successfully.")

    #insert new users into table n_users
    new_users = f"""insert into n_users (id)
    select f.user_id from {table_name} f  left join n_users u on f.user_id = u.id
    where u.id is null and f.user_id is not null"""
    db_functions.update_table(new_users)
    return table_name

예제 #3

0

파일 보기

파일: BERT_friends_ML.py 프로젝트: ChristianZX/TwitterAnalytics

def inference_bert_friends(classifier, column_list: list, sql: str,
                           min_matches: int):
    """
    Performs inference based on users an account follows. Stores result to n_users
    :param classifier: Classifier
    :param friend_column_list_path: Column list to be used. Any Users friends are matched against this column list
    :param sql: Sql with combination of User to be inferendes, their label (combined rating) and their friend ID
    :param min_matches: Minimum friends that must be found in friend_column for the user to get a prediction.
    More connections = more accurate prediction result
    :return:
    """
    start = time.time()
    friends = db_functions.select_from_db(sql)
    input_dataset_length = len(friends)
    print(f"SQL fetching time: {time.time() - start}")

    if len(friends) == 0:
        rows_processed = 0
        return rows_processed

    friend_set = set(friends['follows_ids'].values.tolist())
    friend_list = friends['follows_ids'].values.tolist()
    user_list = friends['user_id'].values.tolist()
    rating_list = friends['combined_rating'].values.tolist()
    del friends

    #Transforms DataFrame into DefaultDict
    relationship_dict = defaultdict(lambda: defaultdict(list))
    for i, element in enumerate(friend_list):
        relationship_dict[element][0].append(user_list[i])
        relationship_dict[element][1].append(rating_list[i])

    conditions_not_met_list = [
    ]  # Ids in this list will still get a last seen date in DB to ignore them during next loop
    result_dict = {}
    #Iteration though friend_set and prediction of faction
    for element in tqdm(friend_set):
        common_friends = set(relationship_dict[element][0]) & set(column_list)
        number_of_common_friends = len(common_friends)
        if number_of_common_friends >= min_matches:
            df = pd.DataFrame(index=column_list).T
            df = df.append(pd.Series(), ignore_index=True).fillna(0)
            for friend in relationship_dict[element][0]:
                df.loc[:, friend] = 1
            df = df.iloc[:, :len(column_list)]
            prediction_proba = classifier.predict_proba(
                df.values.tolist())  # pure predict

            text, conf = helper_functions.conf_value("LR",
                                                     prediction_proba,
                                                     min_boundary=0.5,
                                                     max_boundary=1)
            result_dict[element] = [text, conf, number_of_common_friends]
        else:
            conditions_not_met_list.append(element)

    del friend_set
    del friend_list
    del user_list
    del rating_list

    timestamp = db_functions.staging_timestamp()
    result_df = pd.DataFrame(result_dict).T
    result_df['last_seen'] = timestamp
    rows_processed = len(result_df)
    if rows_processed > 0:  #checks if data has been written to DF
        db_functions.df_to_sql(result_df, "temp_table", "replace")
        update_sql = """update n_users
        set bert_friends_ml_result = "0",
        bert_friends_ml_conf = cast("1" as numeric),
        bert_friends_ml_count = cast ("2" as integer),
        bert_friends_ml_last_seen = temp_table.last_seen
        from temp_table where
        cast (id as text) = temp_table."index"
        """
        start = time.time()
        db_functions.update_table(update_sql)
        db_functions.drop_table("temp_table")
        print(f"Update Time: {time.time() - start}")
    else:
        print(
            f"WARNING: 0 new ratings generated despite an input dataset of {input_dataset_length} rows."
        )

    if len(conditions_not_met_list) > 0:
        stamps = [timestamp for elm in conditions_not_met_list]
        ziped = list(zip(conditions_not_met_list, stamps))
        db_functions.df_to_sql(pd.DataFrame(ziped),
                               "temp_table",
                               drop='replace')
        sql = 'update n_users set bert_friends_ml_last_seen = temp_table."1" from temp_table where n_users.id::text = temp_table."0"'
        db_functions.update_table(sql)
        #db_functions.drop_table('temp_table')

    return rows_processed

예제 #4

0

파일 보기

    lr_pol_last_analysed text COLLATE pg_catalog."default",
    result_bert_friends text COLLATE pg_catalog."default",
    bert_friends_conf numeric,
    bert_friends_last_seen text COLLATE pg_catalog."default",
    bf_left_number integer,
    bf_right_number integer,
    combined_rating text COLLATE pg_catalog."default",
    combined_conf numeric,
    private_profile boolean,
    batch integer,
    bert_friends_ml_result text COLLATE pg_catalog."default",
    bert_friends_ml_conf numeric,
    bert_friends_ml_count integer,
    bert_friends_ml_last_seen text COLLATE pg_catalog."default"
)"""
db_functions.update_table(sql_table)

# Create table n_followers
table_name = "n_followers"
sql_table = f"""
CREATE TABLE public.{table_name}
(
    index integer NOT NULL DEFAULT nextval('n_likes_index_seq'::regclass),
    username text COLLATE pg_catalog."default",
    user_id bigint,
    follows_users text COLLATE pg_catalog."default",
    follows_ids text COLLATE pg_catalog."default",
    retrieve_date text COLLATE pg_catalog."default"
)
"""
db_functions.update_table(sql_table)

예제 #5

0

파일 보기