including constants.write_yamlvar() logger is used to write logging messages ''' script_path = __main__.__file__ args = { 'input_file': sys.argv[1], 'output_file': sys.argv[2], 'metadata_file': create_metadata_filename(sys.argv[2]), 'sheet': 'WeaponDischarges', 'column_names_key': 'TRR-weapon-discharges_2004-2016_2016-09_p046360' } assert args['input_file'].startswith('input/'),\ "input_file is malformed: {}".format(args['input_file']) assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() df = pd.read_excel(cons.input_file, sheet_name=cons.sheet) df.columns = standardize_columns(df.columns, cons.column_names_key) df.to_csv(cons.output_file, **cons.csv_opts) meta_df = collect_metadata(df, cons.input_file, cons.output_file) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: df, report_produced_date, FOIA_request = \ read_p046957_file(input_file, original_crid_col='Number') log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("{}-Report_Produced_Date".format(input_file), report_produced_date) cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request) df.columns = cons.column_names data_df = (data_df.append(df).reset_index(drop=True)) meta_df = (meta_df.append( collect_metadata(df, input_file, cons.output_file)).reset_index(drop=True)) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: df, report_produced_date, FOIA_request = \ read_p046957_file(input_file, original_crid_col='Number:', notnull='Number:') log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("{}-Report_Produced_Date".format(input_file), report_produced_date) cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request) df.columns = cons.column_names data_df = (data_df .append(df) .reset_index(drop=True)) meta_df = (meta_df .append(collect_metadata(df, input_file, cons.output_file)) .reset_index(drop=True)) data_df.insert(0, 'row_id', data_df.index+1) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
assert args['input_file'].startswith('input/'),\ "input_file is malformed: {}".format(args['input_file']) assert (args['output_file'].startswith('output/') and args['output_file'].endswith('.csv.gz')),\ "output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df, report_produced_date, FOIA_request = \ read_p046957_file(cons.input_file, original_crid_col='Number', drop_col_val=('Race Desc', 'end of record')) log.info(('Processing {0} file, of FOIA number {1}, produced on {2}' '').format(cons.input_file, FOIA_request, report_produced_date)) cons.write_yamlvar("Report_Produced_Date", report_produced_date) cons.write_yamlvar("FOIA_Request", FOIA_request) data_df.columns = cons.column_names data_df.reset_index(drop=True, inplace=True) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df = (collect_metadata(data_df, cons.input_file, cons.output_file).reset_index(drop=True)) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
"output_file is malformed: {}".format(args['output_file']) return setup.do_setup(script_path, args) cons, log = get_setup() data_df = pd.DataFrame() meta_df = pd.DataFrame() for input_file in cons.input_files: xls_file = pd.ExcelFile(input_file) for sheet in xls_file.sheet_names: df = xls_file.parse(sheet) df.columns = standardize_columns(df.columns, cons.column_names_key) df.insert(0, 'year', int(sheet)) data_df = (data_df .append(df) .reset_index(drop=True)) meta_df = (meta_df .append(collect_metadata(df, '{0}-{1}'.format(input_file, sheet), cons.output_file)) .reset_index(drop=True)) data_df.insert(0, 'row_id', data_df.index+1) data_df.to_csv(cons.output_file, **cons.csv_opts) meta_df.to_csv(cons.metadata_file, **cons.csv_opts)