예제 #1
0
    ('activity_code', 212 - 203, string),
    ('organization_code', 1, string),
    ('exempt_org_status_code', 2, string),
    ('advance_ruling_expiration', 221 - 215, date),
    ('tax_period', 227 - 221, string),
    ('asset_code', 1, string),
    ('income_code', 1, string),
    ('filing_requirement_code', 3, string),
    (None, 3, filler),
    ('accounting_period', 2, string),
    ('asset_amt', 250 - 237, integer),
    ('income_amt', 264 - 250, integer2),
    ('form_990_revenue_amt', 278 - 264, integer2),
    ('ntee_code', 282 - 278, string),
    ('sort_name', 318 - 282, string),
    (None, 2, filler('\r\n'))
]


def parse():
    return itertools.chain(*[
        parse_file(def_eo, file(fn))
        for fn in glob.glob('../data/crawl/irs/eo/*.LST')
    ])


if __name__ == "__main__":
    import sys
    if 'load' in sys.argv:
        from settings import db
        db.multiple_insert("exempt_org", parse(), seqname=False)
예제 #2
0
from fixed_width import parse_file, string, date, integer, filler, state, digits

def_5500 = [
  ('unk1_digits', 26, string),
  ('unk2', 8, date),
  ('unk3', 8, date),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('plan_name', 140, string),
  ('unk5', 8, date),
  ('corp_name', 141, string),
  ('street1', 35, string),
  ('street2', 108, string),
  ('city', 22, string),
  ('state', 2, state),
  ('zip', 5, digits),
  ('zip4', 4, digits),
  ('unk6', 3, string),
  (None, 792, filler), # unparsed
  (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
예제 #3
0
import warnings
import web
from fixed_width import string, integer, date, filler, enum, state, digits

olddate = date


def date(s):
    out = olddate(s)
    if out == '--': return None
    else: return out


boolean = enum(**{'1': True, '0': False, '': None})

pipe = (None, 1, filler('|'))


def def_address(name):
    return [(name + '_address_1', string), (name + '_address_2', string),
            (name + '_address_city', string), (name + '_address_state', state),
            (name + '_address_zip', digits), (name + '_address_zip4', digits)]


def_entity = [
    ('form_id', integer),
    ('entity_id', integer),
    ('org_name', string),
    ('ein', digits),
    ('entity_name', string),
    ('entity_tile', string),
예제 #4
0
from fixed_width import string, integer, date, filler, enum, state, digits

olddate = date


def date(s):
    out = olddate(s)
    if out == "--":
        return None
    else:
        return out


boolean = enum(**{"1": True, "0": False, "": None})

pipe = (None, 1, filler("|"))


def def_address(name):
    return [
        (name + "_address_1", string),
        (name + "_address_2", string),
        (name + "_address_city", string),
        (name + "_address_state", state),
        (name + "_address_zip", digits),
        (name + "_address_zip4", digits),
    ]


def_entity = [
    ("form_id", integer),
예제 #5
0
    ("other_loan_repay", 10, integer),
    ("debts_owed_by", 10, integer),
    ("total_indiv_contrib", 10, integer),
    ("state_code", 2, string),
    ("district", 2, string),
    ("spec_elec_status", 1, enum),
    ("primary_elec_status", 1, enum),  #@@primary_general?
    ("runoff_elec_status", 1, enum),
    ("general_elec_status", 1, enum),
    ("general_elec_pct", 3, string),
    ("contrib_from_other_pc", 10, integer),
    ("contrib_from_pc", 10, integer),
    ("end_date", 8, date),
    ("refunds_to_indiv", 10, integer),
    ("refunds_to_commit", 10, integer),
    (None, 2, filler('\r\n'))
]

# Supports files for CANSUM04 CANSUM02 CANSUM00 CANSUM98 CANSUM96
def_cansum = [
    ('_type', 0, lambda x: 'Cadidate'),
    ("candidate_id", 9, string),
    ("candidate_name", 38, string),
    ("ico", 1, ico),
    ("party", 1, party),
    ("party_desig", 3, string),
    ("total_receipts", 10, integer),
    ("auth_trans_from", 10, integer),
    ("total_disbursments", 10, integer),
    ("trans_from_auth", 10, integer),
    ("begin_cash", 10, integer),