Python parse_file Exemples, fixed_width.parse_file Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_committees():
    for fn in sorted(glob.glob('../data/crawl/fec/*/cm.dat')):
        print>>sys.stderr, fn
        fh = file(fn)
        if '1980' in fn:
            fh = fix80(def_cm, fh)
        for elt in parse_file(def_cm, fh):
            yield elt

Exemple #2

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_others():
    cur_def = def_oth_86
    for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')):
        print>>sys.stderr, fn
        fh = file(fn)
        if '1990' in fn: cur_def = def_oth_90
        if '1996' in fn: cur_def = def_oth_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #3

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_others():
    cur_def = def_oth_86
    for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')):
        print >> sys.stderr, fn
        fh = file(fn)
        if '1990' in fn: cur_def = def_oth_90
        if '1996' in fn: cur_def = def_oth_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #4

0

Afficher le fichier

def parse_zip2dist(fh):
    for row in parse_file(def_zip4, fh):
        if row['_type'] != 'ZIP+4 Detail': continue
        if row['congress_dist'] == 'AL':
            row['congress_dist'] = '00'
        if row['zip4_lo'] == row['zip4_hi']:
            zip4s = [row['zip4_lo']]
        else:
            zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)]
        for zip4 in zip4s:
            yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist']

Exemple #5

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_contributions():
    for fn in sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz')):
        print>>sys.stderr, fn
        fh = gzip.open(fn)
        if '1980' in fn:
            cur_def = def_indiv_80
            fh = fix80(cur_def, fh)
        if '1990' in fn: cur_def = def_indiv_90
        if '1996' in fn: cur_def = def_indiv_96
        if '2004' not in fn: continue
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #6

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_transfers():
    cur_def = def_pas2_80
    for fn in sorted(glob.glob('../data/crawl/fec/*/pas2.dat')):
        print >> sys.stderr, fn
        fh = file(fn)
        if '1980' in fn:
            cur_def = def_pas2_80
            fh = fix80(def_pas2_80, fh)
        if '1990' in fn: cur_def = def_pas2_90
        if '1994' in fn: cur_def = def_pas2_94
        if '1996' in fn: cur_def = def_pas2_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #7

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_committees(latest=False, reverse=False):
    fns = sorted(glob.glob('../data/crawl/fec/*/cm.dat'))
    if latest:
        fns = [fns[-1]]
    if reverse:
        fns = reversed(fns)
    for fn in fns:
        print >> sys.stderr, fn
        fh = file(fn)
        if '1980' in fn:
            fh = fix80(def_cm, fh)
        for elt in parse_file(def_cm, fh):
            yield elt

Exemple #8

0

Afficher le fichier

Fichier : census.py Projet : AuroraSkywalker/watchdog

def parse_geo_file(fn, args):
    GF= {'D': list(GeoFields['D'])}
    if 'usgeo' in fn or 'by_state' in fn:
        # The geo files for usgeo.* use dos line breaks...
        GF['D'].append((None, 2, fixed_width.filler))
    else:
        # ... the congress geo files use unix line breaks.
        GF['D'].append((None, 1, fixed_width.filler))
    GF['D'].append(('geo_file', 0, lambda x: fn))
    print fn
    #file = codecs.open(fn, 'r', encoding=_text_encoding)
    file = getFile(os.path.dirname(fn), os.path.basename(fn), args)
    return fixed_width.parse_file(GF, file,lambda x:'D')

Exemple #9

0

Afficher le fichier

Fichier : census.py Projet : ChunHungLiu/watchdog-1

def parse_geo_file(fn, args):
    GF = {'D': list(GeoFields['D'])}
    if 'usgeo' in fn or 'by_state' in fn:
        # The geo files for usgeo.* use dos line breaks...
        GF['D'].append((None, 2, fixed_width.filler))
    else:
        # ... the congress geo files use unix line breaks.
        GF['D'].append((None, 1, fixed_width.filler))
    GF['D'].append(('geo_file', 0, lambda x: fn))
    print fn
    #file = codecs.open(fn, 'r', encoding=_text_encoding)
    file = getFile(os.path.dirname(fn), os.path.basename(fn), args)
    return fixed_width.parse_file(GF, file, lambda x: 'D')

Exemple #10

0

Afficher le fichier

Fichier : fec_cobol.py Projet : AuroraSkywalker/watchdog

def parse_committees(latest=False, reverse=False):
    fns = sorted(glob.glob('../data/crawl/fec/*/cm.dat'))
    if latest:
        fns = [fns[-1]]
    if reverse:
        fns = reversed(fns)
    for fn in fns:
        print>>sys.stderr, fn
        fh = file(fn)
        if '1980' in fn:
            fh = fix80(def_cm, fh)
        for elt in parse_file(def_cm, fh):
            yield elt

Exemple #11

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_transfers():
    cur_def = def_pas2_80
    for fn in sorted(glob.glob('../data/crawl/fec/*/pas2.dat')):
        print>>sys.stderr, fn
        fh = file(fn)
        if '1980' in fn:
            cur_def = def_pas2_80
            fh = fix80(def_pas2_80, fh)
        if '1990' in fn: cur_def = def_pas2_90
        if '1994' in fn: cur_def = def_pas2_94
        if '1996' in fn: cur_def = def_pas2_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #12

0

Afficher le fichier

Fichier : fec_cobol.py Projet : AuroraSkywalker/watchdog

def parse_contributions(latest=False):
    fns = sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz'))
    if latest:
        cur_def = def_indiv_96
        fns = [fns[-1]]
    for fn in fns:
        print>>sys.stderr, fn
        fh = gzip.open(fn)
        if '1980' in fn:
            cur_def = def_indiv_80
            fh = fix80(cur_def, fh)
        if '1990' in fn: cur_def = def_indiv_90
        if '1996' in fn: cur_def = def_indiv_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #13

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_contributions(latest=False):
    fns = sorted(glob.glob('../data/crawl/fec/*/indiv.dat.gz'))
    if latest:
        cur_def = def_indiv_96
        fns = [fns[-1]]
    for fn in fns:
        print >> sys.stderr, fn
        fh = gzip.open(fn)
        if '1980' in fn:
            cur_def = def_indiv_80
            fh = fix80(cur_def, fh)
        if '1990' in fn: cur_def = def_indiv_90
        if '1996' in fn: cur_def = def_indiv_96
        for elt in parse_file(cur_def, fh):
            yield elt

Exemple #14

0

Afficher le fichier

def parse():
    return itertools.chain(*[
        parse_file(def_eo, file(fn))
        for fn in glob.glob('../data/crawl/irs/eo/*.LST')
    ])

Exemple #15

0

Afficher le fichier

Fichier : irs_5500.py Projet : AuroraSkywalker/watchdog

def_5500 = [
  ('unk1_digits', 26, string),
  ('unk2', 8, date),
  ('unk3', 8, date),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('plan_name', 140, string),
  ('unk5', 8, date),
  ('corp_name', 141, string),
  ('street1', 35, string),
  ('street2', 108, string),
  ('city', 22, string),
  ('state', 2, state),
  ('zip', 5, digits),
  ('zip4', 4, digits),
  ('unk6', 3, string),
  (None, 792, filler), # unparsed
  (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))

Exemple #16

0

Afficher le fichier

def_5500 = [
    ('unk1_digits', 26, string),
    ('unk2', 8, date),
    ('unk3', 8, date),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('plan_name', 140, string),
    ('unk5', 8, date),
    ('corp_name', 141, string),
    ('street1', 35, string),
    ('street2', 108, string),
    ('city', 22, string),
    ('state', 2, state),
    ('zip', 5, digits),
    ('zip4', 4, digits),
    ('unk6', 3, string),
    (None, 792, filler),  # unparsed
    (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(
        parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))

Exemple #17

0

Afficher le fichier

Fichier : irs_eo.py Projet : AuroraSkywalker/watchdog

def parse():
    return itertools.chain(*[
        parse_file(def_eo, file(fn))
        for fn in glob.glob('../data/crawl/irs/eo/*.LST')
    ])

Exemple #18

0

Afficher le fichier

Fichier : irs_eo.py Projet : gregelin/watchdog

  ('subsection_code', 189-187, string),
  ('affiliation', 1, enum),
  ('classification_code', 194-190, string),
  ('ruling_date', 200-194, date),
  ('deductibility_code', 1, string),  
  ('foundation_code', 2, string),  
  ('activity_code', 212-203, string),  
  ('organization_code', 1, string),  
  ('exempt_org_status_code', 2, string),  
  ('advance_ruling_expiration', 221-215, date),  
  ('tax_period', 227-221, string),  
  ('asset_code', 1, string),  
  ('income_code', 1, string),  
  ('filing_requirement_code', 3, string),  
  (None, 3, filler),  
  ('accounting_period', 2, string),  
  ('asset_amt', 250-237, integer),  
  ('income_amt', 264-250, integer2),  
  ('form_990_revenue_amt', 278-264, integer2),  
  ('ntee_code', 282-278, string),  
  ('sort_name', 318-282, string),
  (None, 2, filler('\r\n'))

]

if __name__ == "__main__":
    import glob
    import tools
    for fn in glob.glob('../data/crawl/irs/eo/*.LST'):
        tools.export(parse_file(def_eo, file(fn)))

Exemple #19

0

Afficher le fichier

    for row in parse_file(def_zip4, fh):
        if row['_type'] != 'ZIP+4 Detail': continue
        if row['congress_dist'] == 'AL':
            row['congress_dist'] = '00'
        if row['zip4_lo'] == row['zip4_hi']:
            zip4s = [row['zip4_lo']]
        else:
            zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)]
        for zip4 in zip4s:
            yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist']

if __name__ == "__main__":
    import sys, glob, tools
    
    def_map = {
      '--ctystate': def_ctystate, 
      '--5digit': def_5digit, 
      '--zip4': def_zip4, 
      '--delstat': def_delstat
    }
    
    if sys.argv[1] in def_map:
        for fn in glob.glob(sys.argv[2] + '*.txt'):
            tools.export(parse_file(def_map[sys.argv[1]], file(fn)))
    elif sys.argv[1] == '--tiger':
        for fn in glob.glob(sys.argv[2] + '*/*.txt'):
            tools.export(parse_tigerzip(file(fn)))
    elif sys.argv[1] == '--tigerdat':
        for fn in glob.glob(sys.argv[2] + '*/TIGER.DAT'):
            tools.export(parse_tigerdat(file(fn)))

Exemple #20

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_cansum():
    return parse_file(def_webl, file("../data/crawl/fec/2008/weball.dat"))

Exemple #21

0

Afficher le fichier

Fichier : fec_cobol.py Projet : ChunHungLiu/watchdog-1

def parse_candidates():
    for fn in sorted(glob.glob('../data/crawl/fec/*/cn.dat')):
        print >> sys.stderr, fn
        for elt in parse_file(def_cn, file(fn)):
            yield elt

Exemple #22

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_candidates():
    for fn in sorted(glob.glob('../data/crawl/fec/*/cn.dat')):
        print>>sys.stderr, fn
        for elt in parse_file(def_cn, file(fn)):
            yield elt

Exemple #23

0

Afficher le fichier

Fichier : fec_cobol.py Projet : jdthomas/watchdog

def parse_cansum():
    return parse_file(def_webl, file("../data/crawl/fec/2008/weball.dat"))