Esempio n. 1
0
        "An input_file is malformed: {}".format(args['input_files'])
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    df, report_produced_date, FOIA_request = \
                                read_p046957_file(input_file,
                                                  original_crid_col='Number')
    log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
              '').format(input_file, FOIA_request, report_produced_date))
    cons.write_yamlvar("{}-Report_Produced_Date".format(input_file),
                       report_produced_date)
    cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request)

    df.columns = cons.column_names

    data_df = (data_df.append(df).reset_index(drop=True))
    meta_df = (meta_df.append(
        collect_metadata(df, input_file,
                         cons.output_file)).reset_index(drop=True))
data_df.to_csv(cons.output_file, **cons.csv_opts)

meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
Esempio n. 2
0
    assert args['input_file'].startswith('input/'),\
        "input_file is malformed: {}".format(args['input_file'])
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()


data_df, report_produced_date, FOIA_request = \
                        read_p046957_file(cons.input_file,
                                          original_crid_col='Number',
                                          drop_col_val=('Race Desc',
                                                        'end of record'))
log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
          '').format(cons.input_file, FOIA_request, report_produced_date))
cons.write_yamlvar("Report_Produced_Date", report_produced_date)
cons.write_yamlvar("FOIA_Request", FOIA_request)

data_df.columns = cons.column_names

data_df.reset_index(drop=True, inplace=True)
data_df.to_csv(cons.output_file, **cons.csv_opts)

meta_df = (collect_metadata(data_df, cons.input_file,
                            cons.output_file).reset_index(drop=True))
meta_df.to_csv(cons.metadata_file, **cons.csv_opts)
Esempio n. 3
0
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    df, report_produced_date, FOIA_request = \
                                read_p046957_file(input_file,
                                                  original_crid_col='Number:',
                                                  notnull='Number:')
    log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
              '').format(input_file, FOIA_request, report_produced_date))
    cons.write_yamlvar("{}-Report_Produced_Date".format(input_file),
                       report_produced_date)
    cons.write_yamlvar("{}-FOIA_Request".format(input_file),
                       FOIA_request)

    df.columns = cons.column_names

    data_df = (data_df
               .append(df)
               .reset_index(drop=True))

    meta_df = (meta_df
Esempio n. 4
0
    assert args['input_file'].startswith('input/'),\
        "input_file is malformed: {}".format(args['input_file'])
    assert (args['output_file'].startswith('output/') and
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

df, report_produced_date, FOIA_request = \
                    read_p046957_file(cons.input_file,
                                      original_crid_col='Gender',
                                      drop_col_val=('Race', 'end of record'),
                                      original_crid_mixed=True,
                                      add_skip=0)
log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
          '').format(cons.input_file, FOIA_request, report_produced_date))
cons.write_yamlvar("Report_Produced_Date", report_produced_date)
cons.write_yamlvar("FOIA_Request", FOIA_request)

df.columns = cons.column_names

df.reset_index(drop=True, inplace=True)
df.insert(0, 'row_id', df.index + 1)
df.to_csv(cons.output_file, **cons.csv_opts)

meta_df = (collect_metadata(df, cons.input_file,
                            cons.output_file).reset_index(drop=True))
Esempio n. 5
0
            args['output_file'].endswith('.csv.gz')),\
        "output_file is malformed: {}".format(args['output_file'])

    return setup.do_setup(script_path, args)


cons, log = get_setup()

data_df = pd.DataFrame()
meta_df = pd.DataFrame()

for input_file in cons.input_files:
    df, report_produced_date, FOIA_request = \
                            read_p046957_file(input_file,
                                              original_crid_col='Number:',
                                              isnull='Number:',
                                              notnull='Location Code:',
                                              drop_col='Beat:')
    log.info(('Processing {0} file, of FOIA number {1}, produced on {2}'
              '').format(input_file, FOIA_request, report_produced_date))
    cons.write_yamlvar("{}-Report_Produced_Date".format(input_file),
                       report_produced_date)
    cons.write_yamlvar("{}-FOIA_Request".format(input_file), FOIA_request)

    df.columns = cons.column_names

    data_df = (data_df.append(df).reset_index(drop=True))

    meta_df = (meta_df.append(
        collect_metadata(df, input_file,
                         cons.output_file)).reset_index(drop=True))