def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle', 'fec_rec_no')},
                        lambda cycle, fecid: 'pac2cand:%s:%s' % (cycle, fecid),
                        keep_fields=True),
            FieldMerger({'transaction_type': ('type', )},
                        lambda t: t.strip().lower()),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # contributor and recipient fields
            ContributorFilter(committees),
            FieldRenamer({'contributor_ext_id': 'pac_id'}),
            FieldAdder('contributor_type', 'C'),
            Pac2CandRecipientFilter(candidates),
            FieldAdder('recipient_type', 'P'),

            # catcode
            CatCodeFilter('contributor', catcodes),

            # add static fields
            FieldAdder('is_amendment', False),
            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')},
                        lambda curr, cycle: ""
                        if cycle != 'Y' else curr == 'Y' and cycle == 'Y',
                        keep_fields=False),

            # filter through spec
            SpecFilter(SPEC))
예제 #2
0
    def test_field_renamer(self):
        fr = FieldRenamer({'x':'a', 'y':'b'})

        expected_data = [{'x':1, 'y':2, 'c':3},
                         {'x':5, 'y':5, 'c':5},
                         {'x':1, 'y':10, 'c':100}]
        self.assert_filter_result(fr, expected_data)
예제 #3
0
def lobbying_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        UnicodeFilter(),
        FieldRemover('Source'),
        FieldMerger({'registrant_name': ('Registrant', 'RegistrantRaw')},
                    name_proc),
        FieldMerger({'registrant_is_firm': ('IsFirm', )}, yn_proc),
        FieldMerger({'client_name': ('Client', 'Client_raw')}, name_proc),
        FieldMerger({'amount': ('Amount', )}, lambda x: float(x or 0)),
        FieldMerger({'affiliate': ('Affiliate', )}, yn_proc),
        FieldMerger({'filing_included_nsfs': ('IncludeNSFS', )}, yn_proc),
        FieldMerger({'include_in_industry_totals': ('Ind', )}, yn_proc),
        FieldMerger({'use': ('Use', )}, yn_proc),
        FieldRenamer({
            'transaction_id': 'Uniqid',
            'transaction_type': 'Type',
            'transaction_type_desc': 'TypeLong',
            'year': 'Year',
            'client_category': 'Catcode',
            'client_parent_name': 'Ultorg',
            'filing_type': 'Self',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
예제 #4
0
    def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            ContribRecipFilter(),
            CommitteeFilter(committees),
            Pac2PacRecipientFilter(candidates, committees),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle','fec_rec_no')}, lambda cycle, fecid: 'pac2pac:%s:%s' % (cycle, fecid), keep_fields=True),
            FieldMerger({'transaction_type': ('type',)}, lambda t: t.strip().lower()),

            # filing reference ID
            FieldRenamer({'filing_id': 'microfilm'}),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # catcode
            FieldMerger({'contributor_category': ('real_code',)}, lambda s: s.upper() if s else "", keep_fields=True),
            FieldMerger({'recipient_category': ('recip_prim_code',)}, lambda s: s.upper() if s else "", keep_fields=True),

            FieldRenamer({'contributor_city': 'city',
                          'contributor_state': 'state',
                          'contributor_zipcode': 'zipcode',
                          'contributor_occupation': 'fec_occ_emp',
                          'recipient_party': 'party',}),
            FieldModifier('contributor_state', lambda s: s.strip().upper() if s else ""),

            FieldAdder('contributor_type', 'C'),


            # add static fields
            FieldAdder('jurisdiction', 'F'),
            FieldMerger({'is_amendment': ('amend',)}, lambda s: s.strip().upper() != 'N'),

            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')}, lambda curr, cycle: "" if cycle != 'Y' else curr == 'Y' and cycle == 'Y', keep_fields=False ),

            # filter through spec
            SpecFilter(SPEC))
예제 #5
0
def agency_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'transaction': 'UniqID',
            'agency_name': 'Agency',
            'agency_ext_id': 'AgencyID',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
예제 #6
0
def bills_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldRenamer({
            'bill_id': 'B_ID',
            'issue': 'SI_ID',
            'congress_no': 'CongNo',
            'bill_name': 'Bill_Name',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
예제 #7
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         FieldRemover('committee_fec_id committee_name report_year report_type is_amendment start_date end_date reporting_period_amount_all semi_annual_amount_all'.split()),
         BundleFilter(),
         #FieldModifier('file_num', lambda x: Bundle.objects.get(pk=x)),
         # Convert any stray floats to integers
         FieldModifier('amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x))) if x else None),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=500),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(LobbyistBundle)
     )
예제 #8
0
def lobbyist_handler(inpath, outpath, infields, outfields):

    run_recipe(
        CSVSource(open(inpath), fieldnames=infields, quotechar='|'),
        FieldAdder('id', ''),
        FieldMerger({'lobbyist_name': ('Lobbyist', 'Lobbyist_raw')},
                    name_proc),
        FieldMerger({'member_of_congress': ('FormerCongMem', )}, yn_proc),
        FieldRenamer({
            'transaction': 'Uniqid',
            'year': 'Year',
            'lobbyist_ext_id': 'LobbyistID',
            'candidate_ext_id': 'CID',
            'government_position': 'OfficalPos',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
예제 #9
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldModifier('year', lambda x: int(x) if x else None),
         FieldRenamer({'transaction_id': 'transaction'}),
         NoneFilter(),
         TRANSACTION_FILTER,
         UnicodeFilter(),
         CountEmitter(every=10000, log=self.log),
         LoaderEmitter(AgencyLoader(
             source=self.inpath,
             description='load from denormalized CSVs',
             imported_by="loadlobbying (%s)" %
             os.getenv('LOGNAME', 'unknown'),
             log=self.log,
         ),
                       commit_every=100),
     )
예제 #10
0
def issue_handler(inpath, outpath, infields, outfields):

    run_recipe(
        VerifiedCSVSource(open(inpath, 'r'),
                          fieldnames=infields,
                          quotechar='|'),
        FieldCountValidator(len(FILE_TYPES['lob_issue'])),
        CSVFieldVerifier(),
        FieldRenamer({
            'id': 'SI_ID',
            'transaction': 'UniqID',
            'general_issue_code': 'IssueID',
            'general_issue': 'Issue',
            'specific_issue': 'SpecIssue',
            'year': 'Year',
        }),
        #DebugEmitter(),
        CSVEmitter(open(outpath, 'w'), fieldnames=outfields),
    )
예제 #11
0
 def run(self):
     run_recipe(
         CSVSource(open(self.inpath)),
         FieldRenamer(self.field_map),
         # Values are [N|A]. Convert to boolean.
         FieldModifier('is_amendment', \
                 lambda x: x == 'A'),
         # Convert any stray floats to integers
         FieldModifier('reporting_period_amount semi_annual_amount'.split(), \
                 lambda x: int(round(float(x.replace('$','').replace(',','')))) if x else None),
         # Convert date formats
         FieldModifier('start_date end_date filing_date'.split(), \
                 lambda x: datetime.strptime(x, '%m/%d/%Y') if x else None),
         # TODO: These following two lines (and the field value) need to be thoroughly tested on the next bundling load
         FieldCopier({'pdf_url': 'first_image_num'}),
         FieldModifier('pdf_url', \
                 lambda x: 'http://query.nictusa.com/pdf/{0}/{1}/{1}.pdf'.format(x[-3:], x)),
         NoneFilter(),
         UnicodeFilter(),
         CountEmitter(every=200),
         #DebugEmitter(),
         SimpleDjangoModelEmitter(Bundle)
     )
예제 #12
0
    def get_record_processor(catcodes, candidates, committees):
        return chain_filters(
            CSVFieldVerifier(),

            # transaction filters
            FieldAdder('transaction_namespace', CRP_TRANSACTION_NAMESPACE),
            FieldMerger({'transaction_id': ('cycle', 'fec_trans_id')},
                        lambda cycle, fecid: 'indiv:%s:%s' % (cycle, fecid),
                        keep_fields=True),
            FieldMerger({'transaction_type': ('type', )},
                        lambda t: t.strip().lower() if t else '',
                        keep_fields=True),

            # filing reference ID
            FieldRenamer({'filing_id': 'microfilm'}),

            # date stamp
            FieldModifier('date', parse_date_iso),

            # rename contributor, organization, and parent_organization fields
            FieldRenamer({
                'contributor_name': 'contrib',
                'parent_organization_name': 'ult_org',
            }),
            IndivRecipientFilter(candidates, committees),
            CommitteeFilter(committees),
            OrganizationFilter(),

            # create URNs
            FieldRenamer({
                'contributor_ext_id': 'contrib_id',
                'committee_ext_id': 'cmte_id'
            }),

            # address and gender fields
            FieldRenamer({
                'contributor_address': 'street',
                'contributor_city': 'city',
                'contributor_state': 'state',
                'contributor_zipcode': 'zipcode',
                'contributor_gender': 'gender'
            }),
            FieldModifier('contributor_state', lambda s: s.upper()
                          if s else ""),
            FieldModifier('contributor_gender', lambda s: s.upper()
                          if s else ""),

            # employer/occupation filter
            FECOccupationFilter(),

            # catcode
            CatCodeFilter('contributor', catcodes),

            # add static fields
            FieldAdder('contributor_type', 'I'),
            FieldAdder('is_amendment', False),
            FieldMerger({'candidacy_status': ('curr_cand', 'cycle_cand')},
                        lambda curr, cycle: ""
                        if cycle != 'Y' else curr == 'Y' and cycle == 'Y',
                        keep_fields=False),

            # filter through spec
            SpecFilter(SPEC))