def load_payment(csvpath, *args, **options): loader = FARAPaymentLoader( source='DOJ', description='load from denormalized CSVs', imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) payment_record_processor = chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('import_reference'), FieldAdder('import_reference', loader.import_session), FieldCopier({'date_asterisk': 'date'}), FieldModifier('date', parse_fara_date), FieldModifier('date_asterisk', parse_fara_asterisk), FieldModifier('amount', parse_decimal), FieldModifier(('document_id', 'client_id', 'registrant_id', 'record_id', 'location_id', 'subcontractor_id'), parse_int), UnicodeFilter(), StringLengthFilter(Payment)) output_func = chain_filters( LoaderEmitter(loader), Every(REPORT_FREQUENCY, progress_tick), ) input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), fieldnames=Payment.FIELDNAMES, skiprows=1) load_data(input_iterator, payment_record_processor, output_func)
def load_payment(csvpath, *args, **options): loader = FARAPaymentLoader( source='DOJ', description='load from denormalized CSVs', imported_by="loadfara.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) payment_record_processor = chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('import_reference'), FieldAdder('import_reference', loader.import_session), FieldCopier({'date_asterisk':'date'}), FieldModifier('date', parse_fara_date), FieldModifier('date_asterisk', parse_fara_asterisk), FieldModifier('amount', parse_decimal), FieldModifier(('document_id', 'client_id', 'registrant_id', 'record_id','location_id', 'subcontractor_id'), parse_int), UnicodeFilter(), StringLengthFilter(Payment)) output_func = chain_filters( LoaderEmitter(loader), Every(REPORT_FREQUENCY, progress_tick), ) input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), fieldnames=Payment.FIELDNAMES, skiprows=1) load_data(input_iterator, payment_record_processor, output_func)
def get_record_processor(catcodes, candidates, committees): return chain_filters( CSVFieldVerifier(), # transaction filters FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE), FieldMerger({'transaction_id': ('cycle','fec_rec_no')}, lambda cycle, fecid: 'pac2cand:%s:%s' % (cycle, fecid), keep_fields=True), FieldMerger({'transaction_type': ('type',)}, lambda t: t.strip().lower()), # date stamp FieldModifier('date', parse_date_iso), # contributor and recipient fields ContributorFilter(committees), FieldRenamer({'contributor_ext_id': 'pac_id'}), FieldAdder('contributor_type', 'C'), Pac2CandRecipientFilter(candidates), FieldAdder('recipient_type', 'P'), # catcode CatCodeFilter('contributor', catcodes), # add static fields FieldAdder('is_amendment', False), FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False ), # filter through spec SpecFilter(SPEC))
def process_allocated(out_dir, input_path): # create allocated things allocated_csv_filename = os.path.join( out_dir, 'nimsp_allocated_contributions.csv') allocated_csv = open(allocated_csv_filename, 'w') allocated_emitter = AllocatedEmitter(allocated_csv, fieldnames=FIELDNAMES) # create unallocated things unallocated_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(unallocated_csv_filename, 'w') unallocated_emitter = UnallocatedEmitter(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid']) input_file = open(input_path, 'r') input_fields = [name for (name, _, _) in CSV_SQL_MAPPING] source = VerifiedCSVSource(input_file, input_fields) output_func = chain_filters(unallocated_emitter, DCIDFilter(SALT_KEY), allocated_emitter) load_data(source, NIMSPDenormalize.get_allocated_record_processor(), output_func) for o in [allocated_csv, unallocated_csv]: o.close()
def get_record_processor(year, import_ref): return chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('county'), FieldAdder('fiscal_year', year), FieldAdder('import_reference', import_ref), FieldModifier(['notes', 'house_members', 'senate_members'], lambda s: string_filter(s, 1024)), FieldModifier(['description', 'house_parties', 'house_states', 'house_districts', 'senate_parties', 'senate_states', 'raw_recipient'], lambda s: string_filter(s, 512)), FieldModifier(['bill_section', 'bill_subsection'], lambda s: string_filter(s, 256)), FieldModifier(['budget_amount', 'senate_amount', 'house_amount', 'omni_amount', 'final_amount'], amount_filter), FieldMerger({'description': ('project_heading', 'description')}, _prepend), FieldModifier(['presidential'], lambda p: presidential_raw.get(p, '')), FieldModifier(['undisclosed'], lambda u: undisclosed_raw.get(u, '')), FieldMerger({'locations': ('city', 'state')}, _normalize_locations), FieldMerger({'members': ('house_members', 'house_parties', 'house_states', 'house_districts', 'senate_members', 'senate_parties', 'senate_states')}, _normalize_members, keep_fields=True), FieldMerger({'recipients': ('raw_recipient',)}, _normalize_recipients), )
def handle(self, csvpath, *args, **options): loader = ContributionLoader( source=options.get('source'), description='load from denormalized CSVs', imported_by="loadcontributions.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) try: input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), FIELDNAMES, skiprows=1 + int(options['skip'])) output_func = chain_filters( LoaderEmitter(loader), #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()), Every(self.COMMIT_FREQUENCY, progress_tick)) record_processor = self.get_record_processor(loader.import_session) load_data(input_iterator, record_processor, output_func) transaction.commit() except KeyboardInterrupt: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise except: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise finally: sys.stdout.flush() sys.stderr.flush()
def process_allocated(out_dir, input_path): # create allocated things allocated_csv_filename = os.path.join(out_dir,'nimsp_allocated_contributions.csv') allocated_csv = open(allocated_csv_filename, 'w') allocated_emitter = AllocatedEmitter(allocated_csv, fieldnames=FIELDNAMES) # create unallocated things unallocated_csv_filename = os.path.join(out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(unallocated_csv_filename, 'w') unallocated_emitter = UnallocatedEmitter(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid']) input_file = open(input_path, 'r') input_fields = [name for (name, _, _) in CSV_SQL_MAPPING] source = VerifiedCSVSource(input_file, input_fields) output_func = chain_filters( unallocated_emitter, DCIDFilter(SALT_KEY), allocated_emitter) load_data(source, NIMSPDenormalize.get_allocated_record_processor(), output_func) for o in [allocated_csv,unallocated_csv]: o.close()
def get_record_processor(catcodes, candidates, committees): return chain_filters( CSVFieldVerifier(), # transaction filters FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE), FieldMerger({'transaction_id': ('cycle', 'fec_rec_no')}, lambda cycle, fecid: 'pac2cand:%s:%s' % (cycle, fecid), keep_fields=True), FieldMerger({'transaction_type': ('type', )}, lambda t: t.strip().lower()), # date stamp FieldModifier('date', parse_date_iso), # contributor and recipient fields ContributorFilter(committees), FieldRenamer({'contributor_ext_id': 'pac_id'}), FieldAdder('contributor_type', 'C'), Pac2CandRecipientFilter(candidates), FieldAdder('recipient_type', 'P'), # catcode CatCodeFilter('contributor', catcodes), # add static fields FieldAdder('is_amendment', False), FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False), # filter through spec SpecFilter(SPEC))
def get_unallocated_record_processor(salts_db): dcid = DCIDFilter(SALT_KEY) return chain_filters( CSVFieldVerifier(), FieldModifier(['contributionid'], parse_int), FieldModifier(['amount'], parse_decimal), SaltFilter(100,salts_db,dcid), dcid)
def get_record_processor(import_session): return chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('import_reference'), FieldAdder('import_reference', import_session), FieldModifier('amount', lambda a: Decimal(str(a))), FieldModifier(['cycle'], parse_int), FieldModifier(['date'], parse_date), BooleanFilter('is_amendment'), UnicodeFilter(), StringLengthFilter(Contribution))
def get_record_processor(catcodes, candidates, committees): return chain_filters( CSVFieldVerifier(), # transaction filters FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE), FieldMerger({'transaction_id': ('cycle','fec_trans_id')}, lambda cycle, fecid: 'indiv:%s:%s' % (cycle, fecid), keep_fields=True), FieldMerger({'transaction_type': ('type',)}, lambda t: t.strip().lower() if t else '', keep_fields=True), # filing reference ID FieldRenamer({'filing_id': 'microfilm'}), # date stamp FieldModifier('date', parse_date_iso), # rename contributor, organization, and parent_organization fields FieldRenamer({'contributor_name': 'contrib', 'parent_organization_name': 'ult_org',}), IndivRecipientFilter(candidates, committees), CommitteeFilter(committees), OrganizationFilter(), # create URNs FieldRenamer({'contributor_ext_id': 'contrib_id', 'committee_ext_id': 'cmte_id'}), # address and gender fields FieldRenamer({'contributor_address': 'street', 'contributor_city': 'city', 'contributor_state': 'state', 'contributor_zipcode': 'zipcode', 'contributor_gender': 'gender'}), FieldModifier('contributor_state', lambda s: s.upper() if s else ""), FieldModifier('contributor_gender', lambda s: s.upper() if s else ""), # employer/occupation filter FECOccupationFilter(), # catcode CatCodeFilter('contributor', catcodes), # add static fields FieldAdder('contributor_type', 'I'), FieldAdder('is_amendment', False), FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False ), # filter through spec SpecFilter(SPEC))
def get_record_processor(catcodes, candidates, committees): return chain_filters( CSVFieldVerifier(), ContribRecipFilter(), CommitteeFilter(committees), Pac2PacRecipientFilter(candidates, committees), # transaction filters FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE), FieldMerger({'transaction_id': ('cycle','fec_rec_no')}, lambda cycle, fecid: 'pac2pac:%s:%s' % (cycle, fecid), keep_fields=True), FieldMerger({'transaction_type': ('type',)}, lambda t: t.strip().lower()), # filing reference ID FieldRenamer({'filing_id': 'microfilm'}), # date stamp FieldModifier('date', parse_date_iso), # catcode FieldMerger({'contributor_category': ('real_code',)}, lambda s: s.upper() if s else "", keep_fields=True), FieldMerger({'recipient_category': ('recip_prim_code',)}, lambda s: s.upper() if s else "", keep_fields=True), FieldRenamer({'contributor_city': 'city', 'contributor_state': 'state', 'contributor_zipcode': 'zipcode', 'contributor_occupation': 'fec_occ_emp', 'recipient_party': 'party',}), FieldModifier('contributor_state', lambda s: s.strip().upper() if s else ""), FieldAdder('contributor_type', 'C'), # add static fields FieldAdder('jurisdiction', 'F'), FieldMerger({'is_amendment': ('amend',)}, lambda s: s.strip().upper() != 'N'), FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False ), # filter through spec SpecFilter(SPEC))
def get_allocated_record_processor(): input_type_conversions = dict([(field, conversion_func) for (field, _, conversion_func) in CSV_SQL_MAPPING if conversion_func]) return chain_filters( CSVFieldVerifier(), MultiFieldConversionFilter(input_type_conversions), # munge fields BestAvailableFilter(), RecipientFilter(), SeatFilter(), IdsFilter(), ContributorTypeFilter(), FieldModifier('date', lambda x: str(x) if x else None), ZipCleaner(), # add static fields FieldAdder('is_amendment',False), FieldAdder('transaction_namespace', NIMSP_TRANSACTION_NAMESPACE), FieldListFilter(FIELDNAMES + ['contributionid']))
def get_record_processor(year, import_ref): return chain_filters( CSVFieldVerifier(), FieldRemover('id'), FieldRemover('county'), FieldAdder('fiscal_year', year), FieldAdder('import_reference', import_ref), FieldModifier(['notes', 'house_members', 'senate_members'], lambda s: string_filter(s, 1024)), FieldModifier([ 'description', 'house_parties', 'house_states', 'house_districts', 'senate_parties', 'senate_states', 'raw_recipient' ], lambda s: string_filter(s, 512)), FieldModifier(['bill_section', 'bill_subsection'], lambda s: string_filter(s, 256)), FieldModifier([ 'budget_amount', 'senate_amount', 'house_amount', 'omni_amount', 'final_amount' ], amount_filter), FieldMerger({'description': ('project_heading', 'description')}, _prepend), FieldModifier(['presidential'], lambda p: presidential_raw.get(p, '')), FieldModifier(['undisclosed'], lambda u: undisclosed_raw.get(u, '')), FieldMerger({'locations': ('city', 'state')}, _normalize_locations), FieldMerger( { 'members': ('house_members', 'house_parties', 'house_states', 'house_districts', 'senate_members', 'senate_parties', 'senate_states') }, _normalize_members, keep_fields=True), FieldMerger({'recipients': ('raw_recipient', )}, _normalize_recipients), )
def handle(self, csvpath, *args, **options): loader = ContributionLoader( source=options.get('source'), description='load from denormalized CSVs', imported_by="loadcontributions.py (%s)" % os.getenv('LOGNAME', 'unknown'), ) try: input_iterator = VerifiedCSVSource(open(os.path.abspath(csvpath)), FIELDNAMES, skiprows=1 + int(options['skip'])) output_func = chain_filters( LoaderEmitter(loader), #Every(self.COMMIT_FREQUENCY, lambda i: transaction.commit()), Every(self.COMMIT_FREQUENCY, progress_tick), Every(self.COMMIT_FREQUENCY, lambda i: reset_queries()), ) record_processor = self.get_record_processor(loader.import_session) load_data(input_iterator, record_processor, output_func) transaction.commit() except KeyboardInterrupt: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise except: traceback.print_exception(*sys.exc_info()) transaction.rollback() raise finally: sys.stdout.flush() sys.stderr.flush()
def get_allocated_record_processor(): input_type_conversions = dict([(field, conversion_func) for (field, _, conversion_func) in CSV_SQL_MAPPING if conversion_func]) return chain_filters( CSVFieldVerifier(), MultiFieldConversionFilter(input_type_conversions), # munge fields BestAvailableFilter(), RecipientFilter(), SeatFilter(), IdsFilter(), ContributorTypeFilter(), FieldModifier('date', lambda x: str(x) if x else None), ZipCleaner(), # add static fields FieldAdder('is_amendment', False), FieldAdder('transaction_namespace', NIMSP_TRANSACTION_NAMESPACE), FieldListFilter(FIELDNAMES + ['contributionid']))
def get_record_processor(catcodes, candidates, committees): return chain_filters( CSVFieldVerifier(), # transaction filters FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE), FieldMerger({'transaction_id': ('cycle', 'fec_trans_id')}, lambda cycle, fecid: 'indiv:%s:%s' % (cycle, fecid), keep_fields=True), FieldMerger({'transaction_type': ('type', )}, lambda t: t.strip().lower() if t else '', keep_fields=True), # filing reference ID FieldRenamer({'filing_id': 'microfilm'}), # date stamp FieldModifier('date', parse_date_iso), # rename contributor, organization, and parent_organization fields FieldRenamer({ 'contributor_name': 'contrib', 'parent_organization_name': 'ult_org', }), IndivRecipientFilter(candidates, committees), CommitteeFilter(committees), OrganizationFilter(), # create URNs FieldRenamer({ 'contributor_ext_id': 'contrib_id', 'committee_ext_id': 'cmte_id' }), # address and gender fields FieldRenamer({ 'contributor_address': 'street', 'contributor_city': 'city', 'contributor_state': 'state', 'contributor_zipcode': 'zipcode', 'contributor_gender': 'gender' }), FieldModifier('contributor_state', lambda s: s.upper() if s else ""), FieldModifier('contributor_gender', lambda s: s.upper() if s else ""), # employer/occupation filter FECOccupationFilter(), # catcode CatCodeFilter('contributor', catcodes), # add static fields FieldAdder('contributor_type', 'I'), FieldAdder('is_amendment', False), FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False), # filter through spec SpecFilter(SPEC))
def get_unallocated_record_processor(salts_db): dcid = DCIDFilter(SALT_KEY) return chain_filters(CSVFieldVerifier(), FieldModifier(['contributionid'], parse_int), FieldModifier(['amount'], parse_decimal), SaltFilter(100, salts_db, dcid), dcid)