def insert_df(df, table, stats_dict): query = 'select id from {}'.format(table) ids_inserted_df = get_df_from_query(query) ids_inserted = [] if not ids_inserted_df.empty: ids_inserted = ids_inserted_df['id'].tolist() df['id'] = df['id'].astype(int) df_to_insert = df[~df['id'].isin(ids_inserted)] if not df_to_insert.empty: insert_values(df_to_insert, table, stats_dict) else: logger.info("All values in {} are already inserted".format(table))
def get_dino_follows(client_id, media_id, int_name): followers = get_df_from_query("""select username from interactions where client_id = {cl_id} and media_id= {m_id} and interaction_id in ( select id from interaction_ids where name = '{int_name}' ) """.format(cl_id=client_id, m_id=media_id, int_name=int_name))['username'].tolist() return followers
def get_interaction_id(interaction): int_id = get_df_from_query( "select id from interaction_ids where name = '{}'".format(interaction) )['id'].loc[0] return int_id
def get_user_id(username): user_id = get_df_from_query( "select id from clients where ig_username= '******'".format(username) )['id'].loc[0] return user_id
def get_media_id(): media_id = get_df_from_query( "select id from media_ids where name = 'instagram'" )['id'].loc[0] return media_id
def get_all_clients(): clients = get_df_from_query( "select id, ig_username from clients where ig_username is not null" ) return clients
start_from_zero = input( "This will restart the pred_stats table. Are you sure you want to run it? Insert y or n: " ) if start_from_zero == 'y': create_tables(PROJECT_DIR, ['pred_stats']) else: sys.exit() ordered_df = df.sort_values(['game_id', 'sequence_number']) processed_df = process_game(ordered_df) predicted_df = get_model(processed_df) insert_predictive_stats(predicted_df) return None if __name__ == '__main__': logging.config.dictConfig(config['logger']) from datetime import datetime sql_path = os.path.join(PROJECT_DIR, 'sql', 'get_game_events.sql') query = open(sql_path, 'r').read().format(sd=START_DATE, ed=END_DATE) df = get_df_from_query(query) t0 = datetime.now() setup = True main_xg(df, setup) t1 = datetime.now() logger.info("The process took {}".format(t1 - t0))
def get_processed_game(game_id): sql_path = os.path.join(UD_DIR, 'sql', 'processed_game.sql') query = open(sql_path, 'r').read().format(game_id=game_id) df = get_df_from_query(query) return df