Example #1
0
def parse_cols(df, renamings, int8s, int32s, dates, timeformat, strs, bools):
    rename_cols(df, renamings)
    format_int8_col(df, int8s)
    format_int32_col(df, int32s)
    format_date_cols(df, dates, timeformat)
    format_str_col(df, strs)
    format_bool_col(df, bools)
def append_new_table_mapd(connection, table_name, csv_file, ts_cols, ts_format,
                          ts_units, int_cols):
    df = pd.read_csv(csv_file)
    format_int_col(df, int_cols)
    if ts_format == 'None':
        format_date_cols(df, ts_cols, un=ts_units)
    elif ts_units == 'None':
        format_date_cols(df, ts_cols, tf=ts_format)

    if df.empty:
        print("no results to upload")
    else:
        #load the new rows
        df.reset_index(drop=True, inplace=True)
        print("loading table " + table_name)
        connection.load_table(
            table_name, df, preserve_index=False,
            create=False)  #append the data into the exisiting table in OmniSci

        #dedupe all of the rows
        command = "select CAST(view_timestamp as DATE) view_timestamp, MAX(view_unique) as view_unique, repo from oss_git_views where repo = 'mapd-core' group by view_timestamp, repo order by view_timestamp ASC"
        df_deduped = pd.read_sql_query(command, connection)
        print("reloading table " + table_name)
        print(df_deduped)
        drop_table_mapd(connection, table_name)
        connection.load_table(
            table_name, df_deduped, preserve_index=False,
            create=True)  #append the data into the exisiting table in OmniSci
Example #3
0
def load_new_table_mapd(connection, table_name, csv_file, dtcol, tfrmt, mapd_host, mapd_user):
    df = pd.read_csv(csv_file)
    df.reset_index(drop=True, inplace=True)
    format_date_cols(df, dtcol, tfrmt) #force the column containing datetime values to be recast from strings to datetimes
    drop_table_mapd(connection, table_name) #drop the old table
    connection.create_table(table_name, df, preserve_index=False) #create the new table
    print ("loading table " + table_name)
    connection.load_table(table_name, df) #load the new table into OmniSci
def load_new_table_mapd(connection, table_name, csv_file, dtcol, tfrmt, drop_cols, mapd_host, mapd_user):
    df = pd.read_csv(csv_file)
    df.reset_index(drop=True, inplace=True)
    format_date_cols(df, dtcol, tfrmt) #force the column containing datetime values to be recast from strings to datetimes
    # drop the big columns of text we don't need for metrics
    df.drop(columns = drop_cols)
    # drop the old table
    drop_table_mapd(connection, table_name) #drop the old table
    print ("creating table " + table_name)
    print ('with columns')
    print (list(df.columns.values))
    connection.create_table(table_name, df, preserve_index=False) #create the new table
    print ("loading table " + table_name)
    connection.load_table(table_name, df) #load the new table into OmniSci
Example #5
0
def parse_data(csvfile, dtcols, intcols, floatcols, strcols, renamings, tfrmt):
    df = pd.read_csv(csvfile)
    df.reset_index(drop=True, inplace=True)
    format_date_cols(
        df, dtcols, tfrmt
    )  #force the column containing datetime values to be recast from strings to timestamps
    format_int_col(df, intcols)
    format_str_col(df, strcols)
    format_flt_col(df, floatcols)
    df = parse_geo_data(df)
    df = df.drop('geo_city_code', 1)
    df = df.drop('city_parent_code', 1)
    df = df.drop('city_target_type', 1)
    df = df.drop('city_status', 1)
    return df
def load_new_table_mapd(connection, table_name, csv_file, ts_cols, ts_format,
                        ts_units, int_cols):
    df = pd.read_csv(csv_file)
    format_int_col(df, int_cols)
    if ts_format == 'None':
        format_date_cols(df, ts_cols, un=ts_units)
    elif ts_units == 'None':
        format_date_cols(df, ts_cols, tf=ts_format)

    if df.empty:
        print("no results to upload")
    else:
        df.reset_index(drop=True, inplace=True)
        print("loading table " + table_name)
        connection.load_table(
            table_name, df, preserve_index=False,
            create=False)  #append the data into the exisiting table in OmniSci