def parse_cols(df, renamings, int8s, int32s, dates, timeformat, strs, bools): rename_cols(df, renamings) format_int8_col(df, int8s) format_int32_col(df, int32s) format_date_cols(df, dates, timeformat) format_str_col(df, strs) format_bool_col(df, bools)
def append_new_table_mapd(connection, table_name, csv_file, ts_cols, ts_format, ts_units, int_cols): df = pd.read_csv(csv_file) format_int_col(df, int_cols) if ts_format == 'None': format_date_cols(df, ts_cols, un=ts_units) elif ts_units == 'None': format_date_cols(df, ts_cols, tf=ts_format) if df.empty: print("no results to upload") else: #load the new rows df.reset_index(drop=True, inplace=True) print("loading table " + table_name) connection.load_table( table_name, df, preserve_index=False, create=False) #append the data into the exisiting table in OmniSci #dedupe all of the rows command = "select CAST(view_timestamp as DATE) view_timestamp, MAX(view_unique) as view_unique, repo from oss_git_views where repo = 'mapd-core' group by view_timestamp, repo order by view_timestamp ASC" df_deduped = pd.read_sql_query(command, connection) print("reloading table " + table_name) print(df_deduped) drop_table_mapd(connection, table_name) connection.load_table( table_name, df_deduped, preserve_index=False, create=True) #append the data into the exisiting table in OmniSci
def load_new_table_mapd(connection, table_name, csv_file, dtcol, tfrmt, mapd_host, mapd_user): df = pd.read_csv(csv_file) df.reset_index(drop=True, inplace=True) format_date_cols(df, dtcol, tfrmt) #force the column containing datetime values to be recast from strings to datetimes drop_table_mapd(connection, table_name) #drop the old table connection.create_table(table_name, df, preserve_index=False) #create the new table print ("loading table " + table_name) connection.load_table(table_name, df) #load the new table into OmniSci
def load_new_table_mapd(connection, table_name, csv_file, dtcol, tfrmt, drop_cols, mapd_host, mapd_user): df = pd.read_csv(csv_file) df.reset_index(drop=True, inplace=True) format_date_cols(df, dtcol, tfrmt) #force the column containing datetime values to be recast from strings to datetimes # drop the big columns of text we don't need for metrics df.drop(columns = drop_cols) # drop the old table drop_table_mapd(connection, table_name) #drop the old table print ("creating table " + table_name) print ('with columns') print (list(df.columns.values)) connection.create_table(table_name, df, preserve_index=False) #create the new table print ("loading table " + table_name) connection.load_table(table_name, df) #load the new table into OmniSci
def parse_data(csvfile, dtcols, intcols, floatcols, strcols, renamings, tfrmt): df = pd.read_csv(csvfile) df.reset_index(drop=True, inplace=True) format_date_cols( df, dtcols, tfrmt ) #force the column containing datetime values to be recast from strings to timestamps format_int_col(df, intcols) format_str_col(df, strcols) format_flt_col(df, floatcols) df = parse_geo_data(df) df = df.drop('geo_city_code', 1) df = df.drop('city_parent_code', 1) df = df.drop('city_target_type', 1) df = df.drop('city_status', 1) return df
def load_new_table_mapd(connection, table_name, csv_file, ts_cols, ts_format, ts_units, int_cols): df = pd.read_csv(csv_file) format_int_col(df, int_cols) if ts_format == 'None': format_date_cols(df, ts_cols, un=ts_units) elif ts_units == 'None': format_date_cols(df, ts_cols, tf=ts_format) if df.empty: print("no results to upload") else: df.reset_index(drop=True, inplace=True) print("loading table " + table_name) connection.load_table( table_name, df, preserve_index=False, create=False) #append the data into the exisiting table in OmniSci