Python VerifiedCSVSource Exemples, dcdata.utils.dryrub.VerifiedCSVSource Python Exemples

Exemple #1

0

Afficher le fichier

    def process_allocated(out_dir, input_path):

        # create allocated things
        allocated_csv_filename = os.path.join(
            out_dir, 'nimsp_allocated_contributions.csv')
        allocated_csv = open(allocated_csv_filename, 'w')
        allocated_emitter = AllocatedEmitter(allocated_csv,
                                             fieldnames=FIELDNAMES)

        # create unallocated things
        unallocated_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(unallocated_csv_filename, 'w')
        unallocated_emitter = UnallocatedEmitter(unallocated_csv,
                                                 fieldnames=FIELDNAMES +
                                                 ['contributionid'])

        input_file = open(input_path, 'r')

        input_fields = [name for (name, _, _) in CSV_SQL_MAPPING]

        source = VerifiedCSVSource(input_file, input_fields)

        output_func = chain_filters(unallocated_emitter, DCIDFilter(SALT_KEY),
                                    allocated_emitter)

        load_data(source, NIMSPDenormalize.get_allocated_record_processor(),
                  output_func)

        for o in [allocated_csv, unallocated_csv]:
            o.close()

Exemple #2

0

Afficher le fichier

def load_payment(csvpath, *args, **options):
    loader = FARAPaymentLoader(
        source='DOJ',
        description='load from denormalized CSVs',
        imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'),
    )

    payment_record_processor = chain_filters(
        CSVFieldVerifier(), FieldRemover('id'),
        FieldRemover('import_reference'),
        FieldAdder('import_reference', loader.import_session),
        FieldCopier({'date_asterisk': 'date'}),
        FieldModifier('date', parse_fara_date),
        FieldModifier('date_asterisk', parse_fara_asterisk),
        FieldModifier('amount', parse_decimal),
        FieldModifier(('document_id', 'client_id', 'registrant_id',
                       'record_id', 'location_id', 'subcontractor_id'),
                      parse_int), UnicodeFilter(), StringLengthFilter(Payment))

    output_func = chain_filters(
        LoaderEmitter(loader),
        Every(REPORT_FREQUENCY, progress_tick),
    )

    input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                       fieldnames=Payment.FIELDNAMES,
                                       skiprows=1)

    load_data(input_iterator, payment_record_processor, output_func)

Exemple #3

0

Afficher le fichier

Fichier : loadearmarks.py Projet : NCDemParty/datacommons

    def handle(self, input_path, year, **options):
        imp = Import.objects.create(source=input_path, imported_by=__file__)

        input_file = open(input_path, 'r')

        input_source = VerifiedCSVSource(input_file, FIELDS, skiprows=1)
        processor = LoadTCSEarmarks.get_record_processor(
            int(year), imp)  # todo: real year and import_ref

        load_data(input_source, processor, save_earmark)

Exemple #4

0

Afficher le fichier

    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        infiles = Files(*[os.path.join(data_path, 'raw', 'crp', 'pac_other%s.txt' % cycle) for cycle in cycles])
        outfile = open(os.path.join(data_path, 'denormalized', 'denorm_pac2pac.txt'), 'w')

        output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record
        source = VerifiedCSVSource(infiles, fieldnames=FILE_TYPES['pac_other'], quotechar="|")

        record_processor = self.get_record_processor(catcodes, candidates, committees)

        load_data(source, record_processor, output_func)

Exemple #5

0

Afficher le fichier

def issue_handler(inpath, outpath, infields, outfields):

    run_recipe(
        VerifiedCSVSource(open(inpath, 'r'),
                          fieldnames=infields,
                          quotechar='|'),
        FieldCountValidator(len(FILE_TYPES['lob_issue'])),
        CSVFieldVerifier(),
        FieldRenamer({
            'id': 'SI_ID',
            'transaction': 'UniqID',
            'general_issue_code': 'IssueID',
            'general_issue': 'Issue',
            'specific_issue': 'SpecIssue',
            'year': 'Year',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )

Exemple #6

0

Afficher le fichier

 def run(self):
     run_recipe(
         VerifiedCSVSource(open(self.inpath)),
         CSVFieldVerifier(),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldRenamer({'transaction_id': 'transaction'}),
         NoneFilter(),
         FieldModifier('specific_issue', lambda x: '' if x is None else x),
         TRANSACTION_FILTER,
         UnicodeFilter(),
         CountEmitter(every=10000, log=self.log),
         LoaderEmitter(IssueLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=100),
     )

Exemple #7

0

Afficher le fichier

    def denormalize(self, data_path, cycles, catcodes, candidates, committees):
        record_processor = self.get_record_processor(catcodes, candidates,
                                                     committees)

        for cycle in cycles:
            in_path = os.path.join(data_path, 'raw', 'crp',
                                   'indivs%s.txt' % cycle)
            infile = open(in_path, 'r')
            out_path = os.path.join(data_path, 'denormalized',
                                    'denorm_indivs.%s.txt' % cycle)
            outfile = open(out_path, 'w')

            sys.stdout.write('Reading from %s, writing to %s...\n' %
                             (in_path, out_path))

            input_source = VerifiedCSVSource(infile,
                                             fieldnames=FILE_TYPES['indivs'],
                                             quotechar="|")
            output_func = CSVEmitter(outfile,
                                     fieldnames=FIELDNAMES).process_record

            load_data(input_source, record_processor, output_func)

Exemple #8

0

Afficher le fichier

Fichier : loadcontributions.py Projet : NCDemParty/datacommons

    def handle(self, csvpath, *args, **options):

        loader = ContributionLoader(
            source=options.get('source'),
            description='load from denormalized CSVs',
            imported_by="loadcontributions.py (%s)" %
            os.getenv('LOGNAME', 'unknown'),
        )

        try:
            input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)),
                                               FIELDNAMES,
                                               skiprows=1 +
                                               int(options['skip']))

            output_func = chain_filters(
                LoaderEmitter(loader),
                #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()),
                Every(self.COMMIT_FREQUENCY, progress_tick),
                Every(self.COMMIT_FREQUENCY, lambda i: reset_queries()),
            )

            record_processor = self.get_record_processor(loader.import_session)

            load_data(input_iterator, record_processor, output_func)

            transaction.commit()
        except KeyboardInterrupt:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        except:
            traceback.print_exception(*sys.exc_info())
            transaction.rollback()
            raise
        finally:
            sys.stdout.flush()
            sys.stderr.flush()

Exemple #9

0

Afficher le fichier

    def process_unallocated(out_dir, salts_db):

        unallocated_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv.TMP')
        unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename),
                               'r')

        salted_csv_filename = os.path.join(
            out_dir, 'nimsp_unallocated_contributions.csv')
        salted_csv = open(salted_csv_filename, 'w')

        source = VerifiedCSVSource(unallocated_csv,
                                   fieldnames=FIELDNAMES + ['contributionid'],
                                   skiprows=1)

        output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record

        load_data(source,
                  NIMSPDenormalize.get_unallocated_record_processor(salts_db),
                  output_func)

        for f in [salted_csv, unallocated_csv]:
            f.close()