Example #1
0
def dump():
    import glob
    import tools
    
    DATA_DIR = '../data'
    ALMANAC_DIR = DATA_DIR + '/crawl/almanac/nationaljournal.com/pubs/almanac/2008/'

    for fn in glob.glob(ALMANAC_DIR + 'people/*/rep*.htm'):
        tools.export([scrape_person(fn)])
    for fn in glob.glob(ALMANAC_DIR + 'states/*/index.html'):
        tools.export([scrape_state(fn)])
Example #2
0
 def save_rle(self):
     fname = QtGui.QFileDialog.getSaveFileName(self, 'Save file', 
             '*.rle', "GoL Patterns (*.rle)")
     
     if fname:
         f = open(fname, 'w')
         
         with f:        
             f.write(tools.export(self.central.board.pattern))
Example #3
0
    def use_pogo(self):
        '''Execute one step of the Pogo algorithm.
        
        This is an algorithm I invented. Similar to the jittery hillclimbing
        algorithm, it looks for the cell(s) with the best impact. If no cell has a
        positive impact (which implies the search is at a local minima), it flips some 
        number of random cells instead. This number is equal to the number of times it has 
        encountered that particular minima. The idea is that it will jump "higher" 
        the more it seems to be getting stuck.
        
        '''
        
        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        
        if not best_list:
            current = tools.export(self.candidate) # To get an immutable key
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                            random.choice(range(self.board["y_size"])))
                self.flip(flip_cell)
        else:        
            flip_cell = random.choice(best_list)
            self.flip(flip_cell)
Example #4
0
    def use_pogo(self):
        
        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        
        if not best_list:
            current = tools.export(self.candidate)
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                            random.choice(range(self.board["y_size"])))
                x = flip_cell[0]
                y = flip_cell[1]
                self.candidate[y][x] = not self.candidate[y][x]
                self.total_needy -= self.impact[y][x]
                self.update_neediness(flip_cell)
                self.update_impact(flip_cell)
        else:        
            flip_cell = random.choice(best_list)
            x = flip_cell[0]
            y = flip_cell[1]
            self.candidate[y][x] = not self.candidate[y][x]
            self.total_needy -= self.impact[y][x]
            self.update_neediness(flip_cell)
            self.update_impact(flip_cell)
Example #5
0
    def use_pogo(self):

        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        if not best_list:
            current = tools.export(self.candidate)
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                             random.choice(range(self.board["y_size"])))
                x = flip_cell[0]
                y = flip_cell[1]
                self.candidate[y][x] = not self.candidate[y][x]
                self.total_needy -= self.impact[y][x]
                self.update_neediness(flip_cell)
                self.update_impact(flip_cell)
        else:
            flip_cell = random.choice(best_list)
            x = flip_cell[0]
            y = flip_cell[1]
            self.candidate[y][x] = not self.candidate[y][x]
            self.total_needy -= self.impact[y][x]
            self.update_neediness(flip_cell)
            self.update_impact(flip_cell)
Example #6
0
    def use_pogo(self):
        '''Execute one step of the Pogo algorithm.
        
        This is an algorithm I invented. Similar to the jittery hillclimbing
        algorithm, it looks for the cell(s) with the best impact. If no cell has a
        positive impact (which implies the search is at a local minima), it flips some 
        number of random cells instead. This number is equal to the number of times it has 
        encountered that particular minima. The idea is that it will jump "higher" 
        the more it seems to be getting stuck.
        
        '''

        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        if not best_list:
            current = tools.export(self.candidate)  # To get an immutable key
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                             random.choice(range(self.board["y_size"])))
                self.flip(flip_cell)
        else:
            flip_cell = random.choice(best_list)
            self.flip(flip_cell)
Example #7
0
                    out.active = True

                yield out

def parse_stats(metrics=METRICS):
    for metric in metrics:
        for fn in glob.glob(STATS_XML % metric):
            try:
                dom = pulldom.parse(fn)
            except IOError:
                continue
            for event, node in dom:
                if event == "START_ELEMENT" and node.tagName == 'representative':
                    yield web.storage(node.attributes.items())

def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}

if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    if current_session == 110: tools.export(parse_fec())
Example #8
0
def_5500 = [
  ('unk1_digits', 26, string),
  ('unk2', 8, date),
  ('unk3', 8, date),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('plan_name', 140, string),
  ('unk5', 8, date),
  ('corp_name', 141, string),
  ('street1', 35, string),
  ('street2', 108, string),
  ('city', 22, string),
  ('state', 2, state),
  ('zip', 5, digits),
  ('zip4', 4, digits),
  ('unk6', 3, string),
  (None, 792, filler), # unparsed
  (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
Example #9
0
    for row in r_row.findall(d):
        out = r_td.findall(row)
        if out:
            dist, membername = r_member.findall(row)[0]
            dist = dist.replace("At Large", "00")
            dist = dist[:2] + "-" + dist[2:].zfill(2)

            s = web.storage()
            s.district = dist
            s.progressive2008 = fixdec(out[0])
            s.chips2008 = fixdec(out[1])
            s.progressiveall = fixdec(out[3])
            s.name = membername.decode("iso-8859-1")

            yield s


def parse_all():
    d = file("../data/crawl/punch/house.html").read()
    for x in parse_doc(d):
        yield x
    d = file("../data/crawl/punch/senate.html").read()
    for x in parse_doc(d):
        yield x


if __name__ == "__main__":
    import tools

    tools.export(parse_all())
Example #10
0
            except err: pass
                
        try: out.gini = gini_est(out.brackets)
        except MissingData: pass
        
        yield out
        
        loc += 8

def parse_soi(verbose=False):
    import sys

    states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 
    'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 
    'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 
    'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 
    'WA', 'WI', 'WV', 'WY']

    if verbose: print>>sys.stderr
    for state in states:
        if verbose: print>>sys.stderr, "\rParsing", state + '...',
        for x in parse_state(state):
            if x.loc.strip() == 'Total':
                x.loc = state
            yield x
    if verbose: print >>sys.stderr, '\r                     '

if __name__ == "__main__":
    import tools
    tools.export(parse_soi(verbose=True))
Example #11
0
    out.population = x['Population']
    if 'Suppressed' in x['Crude Rate']:
        out.crude_rate_suppressed = True
    else:
        out.crude_rate_suppressed = False
        crude_rate = re.search(r'(?P<rate>\d*(\.\d*))', x['Crude Rate'])
        if crude_rate:
            out.crude_rate = crude_rate.group('rate')
        unreliable = 'Unreliable' in x['Crude Rate']
        out.crude_rate_reliable = not unreliable
    return out


def parse_file(fn):
    if not glob(fn):
        return
    parser = csv.DictReader(file(fn), delimiter="\t")
    for x in parser:
        # IgnoreTotal and footer lines
        if x['Notes'] == "Total" or x['Count'] == None:
            continue
        else:
            yield parse_line(x)


if __name__ == "__main__":
    import sys
    import tools
    for i in sys.argv[1:]:
        tools.export(parse_file(i))
Example #12
0
        ("transmission_time", string),  # @@
        ("record_count", integer),
    ],
}


def parse_line(s):
    if s[0] in def_polorgs:
        return parse_line_type(s.strip(), def_polorgs[s[0]])
    else:
        warnings.warn("Don't recognize: " + s[0])


def parse_line_type(line, def4type):
    out = web.storage()
    for (name, kind), val in zip(def4type, line.split("|")):
        out[name] = kind(val)
        print name, val, kind(val)
    return out


def parse_doc(doc):
    for line in doc:
        yield parse_line(line)


if __name__ == "__main__":
    import tools

    tools.export(parse_doc(file("../data/crawl/irs/pol/FullDataFile.txt")))
Example #13
0
    amendment = parse_report(headers, amendment)
    report['committee'] = amendment['committee']
    report['candidate'] = amendment['candidate']
    for k, v in amendment['schedules'].items():
        report['schedules'][k] = v

def apply_amendments(headers, report, amendments):
    report_id = report['report_id']
    if amendments.has_key(report_id):
        amendments = amendments[report_id]
        for a in amendments:
            apply_amendment(headers, report, a)
    report['schedules'] = report['schedules'].values()
    return report

def parse_filings(headers, reports, amendments):
    for r in reports:
        print r['zfn']
        r = parse_report(headers, r)
        r = apply_amendments(headers, r, amendments)
        yield r

def parse_efilings(filepattern = EFILINGS_PATH + '*.zip'):
    headers = parse_headers()
    reports, amendments = file_index(filepattern)
    return parse_filings(headers, reports, amendments)

if __name__ == "__main__":
    tools.export(parse_efilings())

Example #14
0
    for row in parse_file(def_zip4, fh):
        if row['_type'] != 'ZIP+4 Detail': continue
        if row['congress_dist'] == 'AL':
            row['congress_dist'] = '00'
        if row['zip4_lo'] == row['zip4_hi']:
            zip4s = [row['zip4_lo']]
        else:
            zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)]
        for zip4 in zip4s:
            yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist']

if __name__ == "__main__":
    import sys, glob, tools
    
    def_map = {
      '--ctystate': def_ctystate, 
      '--5digit': def_5digit, 
      '--zip4': def_zip4, 
      '--delstat': def_delstat
    }
    
    if sys.argv[1] in def_map:
        for fn in glob.glob(sys.argv[2] + '*.txt'):
            tools.export(parse_file(def_map[sys.argv[1]], file(fn)))
    elif sys.argv[1] == '--tiger':
        for fn in glob.glob(sys.argv[2] + '*/*.txt'):
            tools.export(parse_tigerzip(file(fn)))
    elif sys.argv[1] == '--tigerdat':
        for fn in glob.glob(sys.argv[2] + '*/TIGER.DAT'):
            tools.export(parse_tigerdat(file(fn)))
Example #15
0

def parse_stats(metrics=METRICS):
    for metric in metrics:
        for fn in glob.glob(STATS_XML % metric):
            try:
                dom = pulldom.parse(fn)
            except IOError:
                continue
            for event, node in dom:
                if event == "START_ELEMENT" and node.tagName == 'representative':
                    yield web.storage(node.attributes.items())


def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}


if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    if current_session == 110: tools.export(parse_fec())
Example #16
0
  ('src_occupation', 35, get_data_str),
  ('date', 8, get_data_date),
  ('amount', 7, get_data_int),
  ('src_id', 9, get_data_str),
  ('fec_record_id', 7, get_data_str)
]

def parse_candidates():
    return read_fec_file(file("../data/crawl/fec/2008/weball.dat"), WEB_ROW_DEF)
def parse_committees():
    return read_fec_file(file("../data/crawl/fec/2008/cm.dat"), CM_DEF)
def parse_transfers():
    return read_fec_file(file("../data/crawl/fec/2008/pas2.dat"), PAS2_DEF)
def parse_contributions():
    return read_fec_file(gzip.open("../data/crawl/fec/2008/indiv.dat.gz"), INDIV_ROW_DEF)

if __name__ == "__main__":
    import tools
    tools.export(parse_committees())
    tools.export(parse_transfers())
    tools.export(parse_contributions())    
    tools.export(parse_candidates())
    tools.export(parse_transfers())

#result = read_fec_file("WEBL08.DAT",WEB_ROW_DEF,WEB_ROW_DEF_SIZE)
#result = read_fec_file("cansum04.txt",CANSUM_ROW_DEF,CANSUM_ROW_DEF_SIZE)
#result = read_fec_file("CANSUM94.DAT",CANSUM_94_ROW_DEF,CANSUM_94_ROW_DEF_SIZE)
#result = read_fec_file("CANSUM90.DAT",CANSUM_90_ROW_DEF,CANSUM_90_ROW_DEF_SIZE)
#result = read_fec_file("cansum88.dat",CANSUM_88_ROW_DEF,CANSUM_88_ROW_DEF_SIZE)
#print result
Example #17
0
        ('transmission_date', date),
        ('transmission_time', string),  #@@
        ('record_count', integer)
    ]
}


def parse_line(s):
    if s[0] in def_polorgs:
        return parse_line_type(s.strip(), def_polorgs[s[0]])
    else:
        warnings.warn("Don't recognize: " + s[0])


def parse_line_type(line, def4type):
    out = web.storage()
    for (name, kind), val in zip(def4type, line.split('|')):
        out[name] = kind(val)
        print name, val, kind(val)
    return out


def parse_doc(doc):
    for line in doc:
        yield parse_line(line)


if __name__ == "__main__":
    import tools
    tools.export(parse_doc(file('../data/crawl/irs/pol/FullDataFile.txt')))
Example #18
0
                num, fipscode, distname, distdesc, ignore, ignore2 = \
                        [x.strip().strip('"') for x in lines]
                distnum = None
            if not fipscode.strip(): continue
            shapeid2district[num] = (fipscode, distnum)
        
        out = {}
        for line in file(DATA_DIR + '/'+filename+'.dat'):
            nums = line.strip().split()
            if len(nums) == 3:
                shapeid = nums[0] # other points are the center
                if shapeid in shapeid2district:
                    SKIPME = False
                    district = shapeid2district[shapeid]
                    out.setdefault(district, [])
                    out[district].append([])
                else:
                    SKIPME = True
            elif len(nums) == 2 and not SKIPME:
                out[district][-1].append((float(nums[0]), float(nums[1])))
        
        for (fipscode, distnum), shapes in out.iteritems():
            yield {
              '_type': 'district', 
              'state_fipscode': fipscode, 
              'district': distnum,
              'shapes': shapes
            }

if __name__ == "__main__": tools.export(parse())
Example #19
0
            
            if 'current-committee-assignment' in [
              hasattr(x, 'tagName') and x.tagName for x in node.childNodes
            ]:
                out.active = True

            yield out

def parse_stats(metrics=METRICS):
    for metric in metrics:
        dom = pulldom.parse(STATS_XML % metric)
        for event, node in dom:
            if event == "START_ELEMENT" and node.tagName == 'representative':
                yield web.storage(node.attributes.items())

def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}

if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    tools.export(parse_fec())
Example #20
0
    ('tax_period', 227 - 221, string),
    ('asset_code', 1, string),
    ('income_code', 1, string),
    ('filing_requirement_code', 3, string),
    (None, 3, filler),
    ('accounting_period', 2, string),
    ('asset_amt', 250 - 237, integer),
    ('income_amt', 264 - 250, integer2),
    ('form_990_revenue_amt', 278 - 264, integer2),
    ('ntee_code', 282 - 278, string),
    ('sort_name', 318 - 282, string),
    (None, 2, filler('\r\n'))
]


def parse():
    return itertools.chain(*[
        parse_file(def_eo, file(fn))
        for fn in glob.glob('../data/crawl/irs/eo/*.LST')
    ])


if __name__ == "__main__":
    import sys
    if 'load' in sys.argv:
        from settings import db
        db.multiple_insert("exempt_org", parse(), seqname=False)
    else:
        import tools
        tools.export(parse())
Example #21
0
def parse_row(row):
    out = web.storage()
    for n, item in enumerate(fmt):
        out[item] = row[n]
    out.house_member = (out.house_member or []) and [
        x.strip() for x in out.house_member.split(';')
    ]
    out.senate_member = (out.senate_member or []) and [
        x.strip() for x in out.senate_member.split(';')
    ]
    #out.state = (out.state or []) and [x.strip() for x in out.state.split(';')]
    return out


def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break
        # The id's aren't remotely uniq, map to something that is
        r.id = n + 1  # Lets start at 1 instead of 0
        yield r


if __name__ == "__main__":
    import tools
    tools.export(parse_file(EARMARK_FILE))
Example #22
0
  'house_member', 'house_party', 'house_state', 'district',
  'senate_member', 'senate_party', 'senate_state',
  'presidential', 'undisclosed', 'intended_recipient',
  'notes'
)

def parse_row(row):
    out = web.storage()
    for n, item in enumerate(fmt):
        out[item] = row[n]
    out.house_member = (out.house_member or []) and [x.strip() for x in out.house_member.split(';')]
    out.senate_member = (out.senate_member or []) and [x.strip() for x in out.senate_member.split(';')]
    #out.state = (out.state or []) and [x.strip() for x in out.state.split(';')]
    return out

def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break 
        # The id's aren't remotely uniq, map to something that is
        r.id=n+1 # Lets start at 1 instead of 0
        yield r

if __name__ == "__main__":
    import tools
    tools.export(parse_file(EARMARK_FILE))
Example #23
0
    for k, v in amendment['schedules'].items():
        report['schedules'][k] = v


def apply_amendments(headers, report, amendments):
    report_id = report['report_id']
    if amendments.has_key(report_id):
        amendments = amendments[report_id]
        for a in amendments:
            apply_amendment(headers, report, a)
    report['schedules'] = report['schedules'].values()
    return report


def parse_filings(headers, reports, amendments):
    for r in reports:
        print r['zfn']
        r = parse_report(headers, r)
        r = apply_amendments(headers, r, amendments)
        yield r


def parse_efilings(filepattern=EFILINGS_PATH + '*.zip'):
    headers = parse_headers()
    reports, amendments = file_index(filepattern)
    return parse_filings(headers, reports, amendments)


if __name__ == "__main__":
    tools.export(parse_efilings())
Example #24
0
            pacs = pacs.replace(',', '')
            indivs = indivs.replace(',', '')
            fhout.write(
                '\t'.join([opensecretsid, sector, total, pacs, indivs]) + '\n')

    fhout.close()


def parse_all():
    for fn in glob.glob(CANSUM % (2008, '*')):
        opensecretsid = fn.split('/')[-1].split('.')[0]
        try:
            s8 = parse_can(opensecretsid, 2008)
            try:
                s6 = parse_can(opensecretsid, 2006)
            except:
                yield s8
            else:
                s = web.storage()
                s.badmoney = s8.badmoney + s6.badmoney
                s.total = s8.total + s6.total
                s.business_pac = s8.business_pac + s6.business_pac
                yield s
        except:
            print "Could not read", opensecretsid


if __name__ == "__main__":
    import tools
    tools.export(parse_all())
Example #25
0
        state_codes[line[0]] = line[1].strip().title()
    return state_codes

def parse_historical_voting():
    """
    Parse county-level data. The data is in the format:
    STATE_CODE  COUNTY_NAME DEMOCRAT_COUNT REPUBLICAN_COUNT OTHER_COUNT
    """
    state_codes = read_state_codes()
    files = glob.glob(DATA_PATH + '*')
    
    for fname in files[:-1]: # skip junk file
        for line in file(fname).readlines():
            code, county_name, numbers = line.split('"')
            dem_count, rep_count, other_count = numbers.split()
            state = state_codes[code.strip()]
        
            yield {
              'n_democrats': dem_count,
              'n_republicans': rep_count,
              'n_other': other_count,
              'state_name': state,
              'state_fips': code.strip(),
              'county_name': county_name,
              'year': fname.split('/')[-1]
            }

if __name__ == "__main__":
    import tools
    tools.export(parse_historical_voting())
Example #26
0
    """Take rows (like those returned from getEarmarks) and hash them by representative. If no representative is listed
    then the earmark will be hashed under the "noname" key."""
    marks = getEarmarks(xlsFilename)
    byPerson = {}
    byPerson["noname"] = []
    for row in marks:
        people = row.houseMembers+row.senateMembers
        if len(people) == 0:
            byPerson["noname"].append(row)
        for person in people:
            if person in byPerson:
                byPerson[person].append(row)
            else:
                byPerson[person] = [row]
    return byPerson

def printEarmarks(rows):
    """Takes in an array of rows (like those produced from getEarmarks) and pretty prints them."""
    for row in rows:
        print row

#Examples of how to use, assuming the path to the earmarks file is passed in as the first arg.

#marks = getEarmarksByName(sys.argv[1])
#printEarmarks(marks["noname"])
#printEarmarks(marks["Edwards"])

if __name__ == "__main__":
    import tools
    tools.export(x.__dict__ for x in getEarmarks('../data/crawl/taxpayer/bigkahuna.xls'))
Example #27
0
    for fn in fns:
        print >> sys.stderr, fn
        fh = gzip.open(fn)
        if '1980' in fn:
            cur_def = def_indiv_80
            fh = fix80(cur_def, fh)
        if '1990' in fn: cur_def = def_indiv_90
        if '1996' in fn: cur_def = def_indiv_96
        for elt in parse_file(cur_def, fh):
            yield elt


def parse_others():
    cur_def = def_oth_86
    for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')):
        print >> sys.stderr, fn
        fh = file(fn)
        if '1990' in fn: cur_def = def_oth_90
        if '1996' in fn: cur_def = def_oth_96
        for elt in parse_file(cur_def, fh):
            yield elt


if __name__ == "__main__":
    import tools
    tools.export(parse_candidates())
    tools.export(parse_committees())
    tools.export(parse_transfers())
    tools.export(parse_contributions())
    tools.export(parse_others())
Example #28
0
        yield out

        loc += 8


def parse_soi(verbose=False):
    import sys

    states = [
        'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI',
        'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN',
        'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH',
        'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA',
        'WI', 'WV', 'WY'
    ]

    if verbose: print >> sys.stderr
    for state in states:
        if verbose: print >> sys.stderr, "\rParsing", state + '...',
        for x in parse_state(state):
            if x.loc.strip() == 'Total':
                x.loc = state
            yield x
    if verbose: print >> sys.stderr, '\r                     '


if __name__ == "__main__":
    import tools
    tools.export(parse_soi(verbose=True))
Example #29
0
def_5500 = [
    ('unk1_digits', 26, string),
    ('unk2', 8, date),
    ('unk3', 8, date),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('plan_name', 140, string),
    ('unk5', 8, date),
    ('corp_name', 141, string),
    ('street1', 35, string),
    ('street2', 108, string),
    ('city', 22, string),
    ('state', 2, state),
    ('zip', 5, digits),
    ('zip4', 4, digits),
    ('unk6', 3, string),
    (None, 792, filler),  # unparsed
    (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(
        parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))
Example #30
0
       for line in file(fn):
           out = web.storage()
           out.congress = int(line[0:4])
           out.icpsr_id = int(line[4:10])
           out.icpsr_state = int(line[10:13])
           out.district = int(line[13:15])        
           out.state_name = line[15:23].strip()
           out.party_code = int(line[23:28])
           out.last_name = line[28:41].strip()
           out.dim1 = float(line[41:47])
           out.dim2 = float(line[47:54])
           out.std1 = float(line[54:61])
           out.std2 = float(line[61:68])
           out.corr = float(line[68:75])
           out.loglike = float(line[75:87])
           out.n_votes = int(line[87:92])
           out.n_errs = int(line[92:97])
           out.n_geomeanprob = float(line[97:104])
           
           if out.icpsr_state in state_map:
               out.state_code = state_map[out.icpsr_state]
               if out.district:
                   out.district_id = out.state_code + '-' + str(out.district).zfill(2)
               else:
                   out.district_id = out.state_code 
           
           yield out

if __name__ == "__main__":
    tools.export(parse())
Example #31
0
  ('subsection_code', 189-187, string),
  ('affiliation', 1, enum),
  ('classification_code', 194-190, string),
  ('ruling_date', 200-194, date),
  ('deductibility_code', 1, string),  
  ('foundation_code', 2, string),  
  ('activity_code', 212-203, string),  
  ('organization_code', 1, string),  
  ('exempt_org_status_code', 2, string),  
  ('advance_ruling_expiration', 221-215, date),  
  ('tax_period', 227-221, string),  
  ('asset_code', 1, string),  
  ('income_code', 1, string),  
  ('filing_requirement_code', 3, string),  
  (None, 3, filler),  
  ('accounting_period', 2, string),  
  ('asset_amt', 250-237, integer),  
  ('income_amt', 264-250, integer2),  
  ('form_990_revenue_amt', 278-264, integer2),  
  ('ntee_code', 282-278, string),  
  ('sort_name', 318-282, string),
  (None, 2, filler('\r\n'))

]

if __name__ == "__main__":
    import glob
    import tools
    for fn in glob.glob('../data/crawl/irs/eo/*.LST'):
        tools.export(parse_file(def_eo, file(fn)))