def parse_committees(): for fn in sorted(glob.glob('../data/crawl/fec/*/cm.dat')): print>>sys.stderr, fn fh = file(fn) if '1980' in fn: fh = fix80(def_cm, fh) for elt in parse_file(def_cm, fh): yield elt
def parse_others(): cur_def = def_oth_86 for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')): print>>sys.stderr, fn fh = file(fn) if '1990' in fn: cur_def = def_oth_90 if '1996' in fn: cur_def = def_oth_96 for elt in parse_file(cur_def, fh): yield elt
def parse_others(): cur_def = def_oth_86 for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')): print >> sys.stderr, fn fh = file(fn) if '1990' in fn: cur_def = def_oth_90 if '1996' in fn: cur_def = def_oth_96 for elt in parse_file(cur_def, fh): yield elt
def parse_zip2dist(fh): for row in parse_file(def_zip4, fh): if row['_type'] != 'ZIP+4 Detail': continue if row['congress_dist'] == 'AL': row['congress_dist'] = '00' if row['zip4_lo'] == row['zip4_hi']: zip4s = [row['zip4_lo']] else: zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)] for zip4 in zip4s: yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist']
def parse_contributions(): for fn in sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz')): print>>sys.stderr, fn fh = gzip.open(fn) if '1980' in fn: cur_def = def_indiv_80 fh = fix80(cur_def, fh) if '1990' in fn: cur_def = def_indiv_90 if '1996' in fn: cur_def = def_indiv_96 if '2004' not in fn: continue for elt in parse_file(cur_def, fh): yield elt
def parse_transfers(): cur_def = def_pas2_80 for fn in sorted(glob.glob('../data/crawl/fec/*/pas2.dat')): print >> sys.stderr, fn fh = file(fn) if '1980' in fn: cur_def = def_pas2_80 fh = fix80(def_pas2_80, fh) if '1990' in fn: cur_def = def_pas2_90 if '1994' in fn: cur_def = def_pas2_94 if '1996' in fn: cur_def = def_pas2_96 for elt in parse_file(cur_def, fh): yield elt
def parse_committees(latest=False, reverse=False): fns = sorted(glob.glob('../data/crawl/fec/*/cm.dat')) if latest: fns = [fns[-1]] if reverse: fns = reversed(fns) for fn in fns: print >> sys.stderr, fn fh = file(fn) if '1980' in fn: fh = fix80(def_cm, fh) for elt in parse_file(def_cm, fh): yield elt
def parse_geo_file(fn, args): GF= {'D': list(GeoFields['D'])} if 'usgeo' in fn or 'by_state' in fn: # The geo files for usgeo.* use dos line breaks... GF['D'].append((None, 2, fixed_width.filler)) else: # ... the congress geo files use unix line breaks. GF['D'].append((None, 1, fixed_width.filler)) GF['D'].append(('geo_file', 0, lambda x: fn)) print fn #file = codecs.open(fn, 'r', encoding=_text_encoding) file = getFile(os.path.dirname(fn), os.path.basename(fn), args) return fixed_width.parse_file(GF, file,lambda x:'D')
def parse_geo_file(fn, args): GF = {'D': list(GeoFields['D'])} if 'usgeo' in fn or 'by_state' in fn: # The geo files for usgeo.* use dos line breaks... GF['D'].append((None, 2, fixed_width.filler)) else: # ... the congress geo files use unix line breaks. GF['D'].append((None, 1, fixed_width.filler)) GF['D'].append(('geo_file', 0, lambda x: fn)) print fn #file = codecs.open(fn, 'r', encoding=_text_encoding) file = getFile(os.path.dirname(fn), os.path.basename(fn), args) return fixed_width.parse_file(GF, file, lambda x: 'D')
def parse_committees(latest=False, reverse=False): fns = sorted(glob.glob('../data/crawl/fec/*/cm.dat')) if latest: fns = [fns[-1]] if reverse: fns = reversed(fns) for fn in fns: print>>sys.stderr, fn fh = file(fn) if '1980' in fn: fh = fix80(def_cm, fh) for elt in parse_file(def_cm, fh): yield elt
def parse_transfers(): cur_def = def_pas2_80 for fn in sorted(glob.glob('../data/crawl/fec/*/pas2.dat')): print>>sys.stderr, fn fh = file(fn) if '1980' in fn: cur_def = def_pas2_80 fh = fix80(def_pas2_80, fh) if '1990' in fn: cur_def = def_pas2_90 if '1994' in fn: cur_def = def_pas2_94 if '1996' in fn: cur_def = def_pas2_96 for elt in parse_file(cur_def, fh): yield elt
def parse_contributions(latest=False): fns = sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz')) if latest: cur_def = def_indiv_96 fns = [fns[-1]] for fn in fns: print>>sys.stderr, fn fh = gzip.open(fn) if '1980' in fn: cur_def = def_indiv_80 fh = fix80(cur_def, fh) if '1990' in fn: cur_def = def_indiv_90 if '1996' in fn: cur_def = def_indiv_96 for elt in parse_file(cur_def, fh): yield elt
def parse_contributions(latest=False): fns = sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz')) if latest: cur_def = def_indiv_96 fns = [fns[-1]] for fn in fns: print >> sys.stderr, fn fh = gzip.open(fn) if '1980' in fn: cur_def = def_indiv_80 fh = fix80(cur_def, fh) if '1990' in fn: cur_def = def_indiv_90 if '1996' in fn: cur_def = def_indiv_96 for elt in parse_file(cur_def, fh): yield elt
def parse(): return itertools.chain(*[ parse_file(def_eo, file(fn)) for fn in glob.glob('../data/crawl/irs/eo/*.LST') ])
def_5500 = [ ('unk1_digits', 26, string), ('unk2', 8, date), ('unk3', 8, date), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('plan_name', 140, string), ('unk5', 8, date), ('corp_name', 141, string), ('street1', 35, string), ('street2', 108, string), ('city', 22, string), ('state', 2, state), ('zip', 5, digits), ('zip4', 4, digits), ('unk6', 3, string), (None, 792, filler), # unparsed (None, 2, filler('\r\n')) ] if __name__ == "__main__": import tools tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
def_5500 = [ ('unk1_digits', 26, string), ('unk2', 8, date), ('unk3', 8, date), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('unk4', 1, integer), ('plan_name', 140, string), ('unk5', 8, date), ('corp_name', 141, string), ('street1', 35, string), ('street2', 108, string), ('city', 22, string), ('state', 2, state), ('zip', 5, digits), ('zip4', 4, digits), ('unk6', 3, string), (None, 792, filler), # unparsed (None, 2, filler('\r\n')) ] if __name__ == "__main__": import tools tools.export( parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
('subsection_code', 189-187, string), ('affiliation', 1, enum), ('classification_code', 194-190, string), ('ruling_date', 200-194, date), ('deductibility_code', 1, string), ('foundation_code', 2, string), ('activity_code', 212-203, string), ('organization_code', 1, string), ('exempt_org_status_code', 2, string), ('advance_ruling_expiration', 221-215, date), ('tax_period', 227-221, string), ('asset_code', 1, string), ('income_code', 1, string), ('filing_requirement_code', 3, string), (None, 3, filler), ('accounting_period', 2, string), ('asset_amt', 250-237, integer), ('income_amt', 264-250, integer2), ('form_990_revenue_amt', 278-264, integer2), ('ntee_code', 282-278, string), ('sort_name', 318-282, string), (None, 2, filler('\r\n')) ] if __name__ == "__main__": import glob import tools for fn in glob.glob('../data/crawl/irs/eo/*.LST'): tools.export(parse_file(def_eo, file(fn)))
for row in parse_file(def_zip4, fh): if row['_type'] != 'ZIP+4 Detail': continue if row['congress_dist'] == 'AL': row['congress_dist'] = '00' if row['zip4_lo'] == row['zip4_hi']: zip4s = [row['zip4_lo']] else: zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)] for zip4 in zip4s: yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist'] if __name__ == "__main__": import sys, glob, tools def_map = { '--ctystate': def_ctystate, '--5digit': def_5digit, '--zip4': def_zip4, '--delstat': def_delstat } if sys.argv[1] in def_map: for fn in glob.glob(sys.argv[2] + '*.txt'): tools.export(parse_file(def_map[sys.argv[1]], file(fn))) elif sys.argv[1] == '--tiger': for fn in glob.glob(sys.argv[2] + '*/*.txt'): tools.export(parse_tigerzip(file(fn))) elif sys.argv[1] == '--tigerdat': for fn in glob.glob(sys.argv[2] + '*/TIGER.DAT'): tools.export(parse_tigerdat(file(fn)))
def parse_cansum(): return parse_file(def_webl, file("../data/crawl/fec/2008/weball.dat"))
def parse_candidates(): for fn in sorted(glob.glob('../data/crawl/fec/*/cn.dat')): print >> sys.stderr, fn for elt in parse_file(def_cn, file(fn)): yield elt
def parse_candidates(): for fn in sorted(glob.glob('../data/crawl/fec/*/cn.dat')): print>>sys.stderr, fn for elt in parse_file(def_cn, file(fn)): yield elt