"An input_file is malformed: {}".format(args['input_files']) assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: df, report_produced_date, FOIA_request = \ read_p046957_file(input_file, original_crid_col='Number') log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("{}-Report_Produced_Date".format(input_file), report_produced_date) cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request) df.columns = cons.column_names data_df = (data_df.append(df).reset_index(drop=True)) meta_df = (meta_df.append( collect_metadata(df, input_file, cons.output_file)).reset_index(drop=True)) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
assert args['input_file'].startswith('input/'),\ "input_file is malformed: {}".format(args['input_file']) assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df, report_produced_date, FOIA_request = \ read_p046957_file(cons.input_file, original_crid_col='Number', drop_col_val=('Race Desc', 'end of record')) log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(cons.input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("Report_Produced_Date", report_produced_date) cons.write_yamlvar("FOIA_Request", FOIA_request) data_df.columns = cons.column_names data_df.reset_index(drop=True, inplace=True) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df = (collect_metadata(data_df, cons.input_file, cons.output_file).reset_index(drop=True)) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: df, report_produced_date, FOIA_request = \ read_p046957_file(input_file, original_crid_col='Number:', notnull='Number:') log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("{}-Report_Produced_Date".format(input_file), report_produced_date) cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request) df.columns = cons.column_names data_df = (data_df .append(df) .reset_index(drop=True)) meta_df = (meta_df
assert args['input_file'].startswith('input/'),\ "input_file is malformed: {}".format(args['input_file']) assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() df, report_produced_date, FOIA_request = \ read_p046957_file(cons.input_file, original_crid_col='Gender', drop_col_val=('Race', 'end of record'), original_crid_mixed=True, add_skip=0) log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(cons.input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("Report_Produced_Date", report_produced_date) cons.write_yamlvar("FOIA_Request", FOIA_request) df.columns = cons.column_names df.reset_index(drop=True, inplace=True) df.insert(0, 'row_id', df.index + 1) df.to_csv(cons.output_file, **cons.csv_opts) meta_df = (collect_metadata(df, cons.input_file, cons.output_file).reset_index(drop=True))
args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: df, report_produced_date, FOIA_request = \ read_p046957_file(input_file, original_crid_col='Number:', isnull='Number:', notnull='Location Code:', drop_col='Beat:') log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("{}-Report_Produced_Date".format(input_file), report_produced_date) cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request) df.columns = cons.column_names data_df = (data_df.append(df).reset_index(drop=True)) meta_df = (meta_df.append( collect_metadata(df, input_file, cons.output_file)).reset_index(drop=True))