import warnings import web from fixed_width import string, integer, date, filler, enum, state, digits olddate = date def date(s): out = olddate(s) if out == "--": return None else: return out boolean = enum(**{"1": True, "0": False, "": None}) pipe = (None, 1, filler("|")) def def_address(name): return [ (name + "_address_1", string), (name + "_address_2", string), (name + "_address_city", string), (name + "_address_state", state), (name + "_address_zip", digits), (name + "_address_zip4", digits), ]
""" Parser for USPS AIS records. """ from fixed_width import date, year, string, boolean, filler, enum, integer, \ FIELD_KEY, FIELD_LEN, FIELD_TYP, get_len, parse_line, parse_file ## Types used in definitions def halfbool1(s): return dict(A=True, B=True, C=False, D=False) def halfbool2(s): return dict(A=True, B=False, C=True, D=False) oddeven = enum(O='ODD', E='EVEN', B='BOTH') ## The definitions def def_copyright(n): return [ ('_type', 1, lambda s: 'File Header'), (None, 5, filler), ('copyright_statement', 12, string), (None, 1, filler), ('month', 2, integer), (None, 1, filler), ('year', 2, year), (None, 1, filler), ('copyright_owner', 4, string), (None, 1, filler),
# \x98 seems to be a typo return d.replace('\x98', '').decode('cp1251').rstrip() def date99(d): """where `d` is like MMDDYY""" return '19' + d[4:6] + "-" + d[0:2] + "-" + d[2:4] def date(d): """where `d` is like MMDDYYYY""" return d[4:8] + "-" + d[0:2] + "-" + d[2:4] def date2(d): "??DDMMYYYY" return d[6:10] + "-" + d[4:6] + "-" + d[2:4] party = enum(**{"1": "Democratic", "2": "Republican", "3": "Other"}) ico = enum(**{" ": " ", "I": "Incumbent", "C": "Challenger", "O": "Open Seat"}) pgi = enum(**{ 'C': "CONVENTION", 'G': "GENERAL", 'P': "PRIMARY", 'R': "RUNOFF", 'S': "SPECIAL", '0': '0', '2': '2', '4': '4', '5': '5', '6': '6', '8': '8' }) filing_freq = enum(M="MONTHLY", Q="QUARTERLY", T="TERMINATED")
Parse IRS' political organizations' form download. """ import warnings import web from fixed_width import string, integer, date, filler, enum, state, digits olddate = date def date(s): out = olddate(s) if out == '--': return None else: return out boolean = enum(**{'1': True, '0': False, '': None}) pipe = (None, 1, filler('|')) def def_address(name): return [(name + '_address_1', string), (name + '_address_2', string), (name + '_address_city', string), (name + '_address_state', state), (name + '_address_zip', digits), (name + '_address_zip4', digits)] def_entity = [ ('form_id', integer), ('entity_id', integer), ('org_name', string), ('ein', digits),
def date99(d): """where `d` is like MMDDYY""" return ('19' + d[4:6] + "-" + d[0:2] + "-" + d[2:4]).replace(' ', '0') def date(d): """where `d` is like MMDDYYYY""" return (d[4:8] + "-" + d[0:2] + "-" + d[2:4]).replace(' ', '0') def date2(d): "??DDMMYYYY" return (d[6:10] + "-" + d[4:6] + "-" + d[2:4]).replace(' ', '0') party = enum(**{"1": "Democratic", "2": "Republican", "3": "Other"}) ico = enum(**{" ": " ", "I": "Incumbent", "C": "Challenger", "O": "Open Seat"}) pgi = enum( **{ 'C': "CONVENTION", 'G': "GENERAL", 'P': "PRIMARY", 'R': "RUNOFF", 'S': "SPECIAL", '0': '0', '2': '2', '4': '4', '5': '5', '6': '6', '8': '8' })