including constants.write_yamlvar()
        logger is used to write logging messages
    '''
    script_path = __main__.__file__
    args = {
        'input_file': sys.argv[1],
        'output_file': sys.argv[2],
        'metadata_file': create_metadata_filename(sys.argv[2]),
        'sheet': 'WeaponDischarges',
        'column_names_key': 'TRR-weapon-discharges_2004-2016_2016-09_p046360'
    }

    assert args['input_file'].startswith('input/'),\
        "input_file is malformed: {}".format(args['input_file'])
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

df = pd.read_excel(cons.input_file, sheet_name=cons.sheet)
df.columns = standardize_columns(df.columns, cons.column_names_key)

df.to_csv(cons.output_file, **cons.csv_opts)

meta_df = collect_metadata(df, cons.input_file, cons.output_file)
meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    df, report_produced_date, FOIA_request = \
                                read_p046957_file(input_file,
                                                  original_crid_col='Number')
    log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
              '').format(input_file, FOIA_request, report_produced_date))
    cons.write_yamlvar("{}-Report_Produced_Date".format(input_file),
                       report_produced_date)
    cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request)

    df.columns = cons.column_names

    data_df = (data_df.append(df).reset_index(drop=True))
    meta_df = (meta_df.append(
        collect_metadata(df, input_file,
                         cons.output_file)).reset_index(drop=True))
data_df.to_csv(cons.output_file, **cons.csv_opts)

meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    df, report_produced_date, FOIA_request = \
                                read_p046957_file(input_file,
                                                  original_crid_col='Number:',
                                                  notnull='Number:')
    log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
              '').format(input_file, FOIA_request, report_produced_date))
    cons.write_yamlvar("{}-Report_Produced_Date".format(input_file),
                       report_produced_date)
    cons.write_yamlvar("{}-FOIA_Request".format(input_file),
                       FOIA_request)

    df.columns = cons.column_names

    data_df = (data_df
               .append(df)
               .reset_index(drop=True))

    meta_df = (meta_df
               .append(collect_metadata(df, input_file, cons.output_file))
               .reset_index(drop=True))
data_df.insert(0, 'row_id', data_df.index+1)
data_df.to_csv(cons.output_file, **cons.csv_opts)
meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
Exemple #4
0
    assert args['input_file'].startswith('input/'),\
        "input_file is malformed: {}".format(args['input_file'])
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()


data_df, report_produced_date, FOIA_request = \
                        read_p046957_file(cons.input_file,
                                          original_crid_col='Number',
                                          drop_col_val=('Race Desc',
                                                        'end of record'))
log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
          '').format(cons.input_file, FOIA_request, report_produced_date))
cons.write_yamlvar("Report_Produced_Date", report_produced_date)
cons.write_yamlvar("FOIA_Request", FOIA_request)

data_df.columns = cons.column_names

data_df.reset_index(drop=True, inplace=True)
data_df.to_csv(cons.output_file, **cons.csv_opts)

meta_df = (collect_metadata(data_df, cons.input_file,
                            cons.output_file).reset_index(drop=True))
meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    xls_file = pd.ExcelFile(input_file)
    for sheet in xls_file.sheet_names:
        df = xls_file.parse(sheet)
        df.columns = standardize_columns(df.columns, cons.column_names_key)
        df.insert(0, 'year', int(sheet))

        data_df = (data_df
                   .append(df)
                   .reset_index(drop=True))

        meta_df = (meta_df
                   .append(collect_metadata(df,
                                            '{0}-{1}'.format(input_file,
                                                             sheet),
                                            cons.output_file))
                   .reset_index(drop=True))
data_df.insert(0, 'row_id', data_df.index+1)
data_df.to_csv(cons.output_file, **cons.csv_opts)
meta_df.to_csv(cons.metadata_file, **cons.csv_opts)