def test_csv_emitter(self): ce = CSVEmitter(self.output, ('x', 'y', 'z')) data = ce.attach([{'x': 1, 'y': 2, 'z': 3}, {'x': 5, 'y': 5, 'z': 5}]) for _ in data: pass self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
def test_csv_emitter(self): try: import cStringIO # if Python 2.x then use old cStringIO io = cStringIO.StringIO() except: io = StringIO() # if Python 3.x then use StringIO with closing(io) as output: ce = CSVEmitter(output, ('x','y','z')) list(ce.attach([{'x':1, 'y':2, 'z':3}, {'x':5, 'y':5, 'z':5}])) self.assertEqual(output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')
def lobbying_handler(inpath, outpath, infields, outfields): run_recipe( CSVSource(open(inpath), fieldnames=infields, quotechar='|'), UnicodeFilter(), FieldRemover('Source'), FieldMerger({'registrant_name': ('Registrant', 'RegistrantRaw')}, name_proc), FieldMerger({'registrant_is_firm': ('IsFirm', )}, yn_proc), FieldMerger({'client_name': ('Client', 'Client_raw')}, name_proc), FieldMerger({'amount': ('Amount', )}, lambda x: float(x or 0)), FieldMerger({'affiliate': ('Affiliate', )}, yn_proc), FieldMerger({'filing_included_nsfs': ('IncludeNSFS', )}, yn_proc), FieldMerger({'include_in_industry_totals': ('Ind', )}, yn_proc), FieldMerger({'use': ('Use', )}, yn_proc), FieldRenamer({ 'transaction_id': 'Uniqid', 'transaction_type': 'Type', 'transaction_type_desc': 'TypeLong', 'year': 'Year', 'client_category': 'Catcode', 'client_parent_name': 'Ultorg', 'filing_type': 'Self', }), #DebugEmitter(), CSVEmitter(open(outpath, 'w'), fieldnames=outfields), )
def denormalize(self, data_path, cycles, catcodes, candidates, committees): infiles = Files(*[os.path.join(data_path, 'raw', 'crp', 'pac_other%s.txt' % cycle) for cycle in cycles]) outfile = open(os.path.join(data_path, 'denormalized', 'denorm_pac2pac.txt'), 'w') output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record source = VerifiedCSVSource(infiles, fieldnames=FILE_TYPES['pac_other'], quotechar="|") record_processor = self.get_record_processor(catcodes, candidates, committees) load_data(source, record_processor, output_func)
def agency_handler(inpath, outpath, infields, outfields): run_recipe( CSVSource(open(inpath), fieldnames=infields, quotechar='|'), FieldAdder('id', ''), FieldRenamer({ 'transaction': 'UniqID', 'agency_name': 'Agency', 'agency_ext_id': 'AgencyID', }), #DebugEmitter(), CSVEmitter(open(outpath, 'w'), fieldnames=outfields), )
def bills_handler(inpath, outpath, infields, outfields): run_recipe( CSVSource(open(inpath), fieldnames=infields, quotechar='|'), FieldAdder('id', ''), FieldRenamer({ 'bill_id': 'B_ID', 'issue': 'SI_ID', 'congress_no': 'CongNo', 'bill_name': 'Bill_Name', }), #DebugEmitter(), CSVEmitter(open(outpath, 'w'), fieldnames=outfields), )
def lobbyist_handler(inpath, outpath, infields, outfields): run_recipe( CSVSource(open(inpath), fieldnames=infields, quotechar='|'), FieldAdder('id', ''), FieldMerger({'lobbyist_name': ('Lobbyist', 'Lobbyist_raw')}, name_proc), FieldMerger({'member_of_congress': ('FormerCongMem', )}, yn_proc), FieldRenamer({ 'transaction': 'Uniqid', 'year': 'Year', 'lobbyist_ext_id': 'LobbyistID', 'candidate_ext_id': 'CID', 'government_position': 'OfficalPos', }), #DebugEmitter(), CSVEmitter(open(outpath, 'w'), fieldnames=outfields), )
def issue_handler(inpath, outpath, infields, outfields): run_recipe( VerifiedCSVSource(open(inpath, 'r'), fieldnames=infields, quotechar='|'), FieldCountValidator(len(FILE_TYPES['lob_issue'])), CSVFieldVerifier(), FieldRenamer({ 'id': 'SI_ID', 'transaction': 'UniqID', 'general_issue_code': 'IssueID', 'general_issue': 'Issue', 'specific_issue': 'SpecIssue', 'year': 'Year', }), #DebugEmitter(), CSVEmitter(open(outpath, 'w'), fieldnames=outfields), )
def denormalize(self, data_path, cycles, catcodes, candidates, committees): record_processor = self.get_record_processor(catcodes, candidates, committees) for cycle in cycles: in_path = os.path.join(data_path, 'raw', 'crp', 'indivs%s.txt' % cycle) infile = open(in_path, 'r') out_path = os.path.join(data_path, 'denormalized', 'denorm_indivs.%s.txt' % cycle) outfile = open(out_path, 'w') sys.stdout.write('Reading from %s, writing to %s...\n' % (in_path, out_path)) input_source = VerifiedCSVSource(infile, fieldnames=FILE_TYPES['indivs'], quotechar="|") output_func = CSVEmitter(outfile, fieldnames=FIELDNAMES).process_record load_data(input_source, record_processor, output_func)
def process_unallocated(out_dir, salts_db): unallocated_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv.TMP') unallocated_csv = open(os.path.join(out_dir, unallocated_csv_filename), 'r') salted_csv_filename = os.path.join( out_dir, 'nimsp_unallocated_contributions.csv') salted_csv = open(salted_csv_filename, 'w') source = VerifiedCSVSource(unallocated_csv, fieldnames=FIELDNAMES + ['contributionid'], skiprows=1) output_func = CSVEmitter(salted_csv, FIELDNAMES).process_record load_data(source, NIMSPDenormalize.get_unallocated_record_processor(salts_db), output_func) for f in [salted_csv, unallocated_csv]: f.close()
def test_csv_emitter(self): ce = CSVEmitter(self.output, ('x','y','z')) data = ce.attach([{'x':1,'y':2,'z':3}, {'x':5, 'y':5, 'z':5}]) for _ in data: pass self.assertEquals(self.output.getvalue(), 'x,y,z\r\n1,2,3\r\n5,5,5\r\n')