def gen_analyzed_data(): """ Generate the data to be analyzed from the original pred data """ # schemas SCHEMA = Config().read_schema( ) # dict id -> col_name, e.g. SCHEMA[1]='clk' del SCHEMA[1] header_str = [v for k, v in SCHEMA.iteritems()] header_int = [k for k, v in SCHEMA.iteritems()] col2id = {v: k for k, v in SCHEMA.iteritems()} feature_conf_dic = CONF.read_feature_conf() cross_feature_list = CONF.read_cross_feature_conf() # load data df = pd.read_table(FLAGS.pred_data + "/pred1", header=header_int) # reformat the table, only analyzed columns are left keep_columns_str = get_analyzed_columns(feature_conf_dic) keep_columns_int = [col2id[v] for v in keep_columns_str] keep_columns_int.sort() df_keep_columns_int = [ col - 2 for col in keep_columns_int ] # dataframe starts from column 0; while our map start from 2 analyzed_table = df.iloc[:, df_keep_columns_int] # save to csv analyzed_table.to_csv(FLAGS.analyzed_data, header=[SCHEMA[k] for k in keep_columns_int], index=False) print("Analyzed data generation finished.")
def gen_pred_csv(): """ Save the pred data as csv """ # schemas SCHEMA = Config().read_schema( ) # dict id -> col_name, e.g. SCHEMA[1]='clk' del SCHEMA[1] # load data df = pd.read_table(FLAGS.pred_data + "/pred1") # save to csv df.to_csv("../data/pred/pred1.csv", header=[v for k, v in SCHEMA.iteritems()], index=False) print("Csv generation finished.")