('activity_code', 212 - 203, string), ('organization_code', 1, string), ('exempt_org_status_code', 2, string), ('advance_ruling_expiration', 221 - 215, date), ('tax_period', 227 - 221, string), ('asset_code', 1, string), ('income_code', 1, string), ('filing_requirement_code', 3, string), (None, 3, filler), ('accounting_period', 2, string), ('asset_amt', 250 - 237, integer), ('income_amt', 264 - 250, integer2), ('form_990_revenue_amt', 278 - 264, integer2), ('ntee_code', 282 - 278, string), ('sort_name', 318 - 282, string), (None, 2, filler('\r\n')) ] def parse(): return itertools.chain(*[ parse_file(def_eo, file(fn)) for fn in glob.glob('../data/crawl/irs/eo/*.LST') ]) if __name__ == "__main__": import sys if 'load' in sys.argv: from settings import db db.multiple_insert("exempt_org", parse(), seqname=False)
from fixed_width import parse_file, string, date, integer, filler, state, digits def_5500 = [ ('unk1_digits', 26, string), ('unk2', 8, date), ('unk3', 8, date), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('plan_name', 140, string), ('unk5', 8, date), ('corp_name', 141, string), ('street1', 35, string), ('street2', 108, string), ('city', 22, string), ('state', 2, state), ('zip', 5, digits), ('zip4', 4, digits), ('unk6', 3, string), (None, 792, filler), # unparsed (None, 2, filler('\r\n')) ] if __name__ == "__main__": import tools tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
import warnings import web from fixed_width import string, integer, date, filler, enum, state, digits olddate = date def date(s): out = olddate(s) if out == '--': return None else: return out boolean = enum(**{'1': True, '0': False, '': None}) pipe = (None, 1, filler('|')) def def_address(name): return [(name + '_address_1', string), (name + '_address_2', string), (name + '_address_city', string), (name + '_address_state', state), (name + '_address_zip', digits), (name + '_address_zip4', digits)] def_entity = [ ('form_id', integer), ('entity_id', integer), ('org_name', string), ('ein', digits), ('entity_name', string), ('entity_tile', string),
from fixed_width import string, integer, date, filler, enum, state, digits olddate = date def date(s): out = olddate(s) if out == "--": return None else: return out boolean = enum(**{"1": True, "0": False, "": None}) pipe = (None, 1, filler("|")) def def_address(name): return [ (name + "_address_1", string), (name + "_address_2", string), (name + "_address_city", string), (name + "_address_state", state), (name + "_address_zip", digits), (name + "_address_zip4", digits), ] def_entity = [ ("form_id", integer),
("other_loan_repay", 10, integer), ("debts_owed_by", 10, integer), ("total_indiv_contrib", 10, integer), ("state_code", 2, string), ("district", 2, string), ("spec_elec_status", 1, enum), ("primary_elec_status", 1, enum), #@@primary_general? ("runoff_elec_status", 1, enum), ("general_elec_status", 1, enum), ("general_elec_pct", 3, string), ("contrib_from_other_pc", 10, integer), ("contrib_from_pc", 10, integer), ("end_date", 8, date), ("refunds_to_indiv", 10, integer), ("refunds_to_commit", 10, integer), (None, 2, filler('\r\n')) ] # Supports files for CANSUM04 CANSUM02 CANSUM00 CANSUM98 CANSUM96 def_cansum = [ ('_type', 0, lambda x: 'Cadidate'), ("candidate_id", 9, string), ("candidate_name", 38, string), ("ico", 1, ico), ("party", 1, party), ("party_desig", 3, string), ("total_receipts", 10, integer), ("auth_trans_from", 10, integer), ("total_disbursments", 10, integer), ("trans_from_auth", 10, integer), ("begin_cash", 10, integer),