Python export Examples, tools.export Python Examples

Example #1

0

Show file

File: almanac.py Project: ChunHungLiu/watchdog-1

def dump():
    import glob
    import tools
    
    DATA_DIR = '../data'
    ALMANAC_DIR = DATA_DIR + '/crawl/almanac/nationaljournal.com/pubs/almanac/2008/'

    for fn in glob.glob(ALMANAC_DIR + 'people/*/rep*.htm'):
        tools.export([scrape_person(fn)])
    for fn in glob.glob(ALMANAC_DIR + 'states/*/index.html'):
        tools.export([scrape_state(fn)])

Example #2

0

Show file

File: gui.py Project: chrishefele/kaggle-sample-code

 def save_rle(self):
     fname = QtGui.QFileDialog.getSaveFileName(self, 'Save file', 
             '*.rle', "GoL Patterns (*.rle)")
     
     if fname:
         f = open(fname, 'w')
         
         with f:        
             f.write(tools.export(self.central.board.pattern))

Example #3

0

Show file

File: search.py Project: PeterBorah/atabot

    def use_pogo(self):
        '''Execute one step of the Pogo algorithm.
        
        This is an algorithm I invented. Similar to the jittery hillclimbing
        algorithm, it looks for the cell(s) with the best impact. If no cell has a
        positive impact (which implies the search is at a local minima), it flips some 
        number of random cells instead. This number is equal to the number of times it has 
        encountered that particular minima. The idea is that it will jump "higher" 
        the more it seems to be getting stuck.
        
        '''
        
        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        
        if not best_list:
            current = tools.export(self.candidate) # To get an immutable key
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                            random.choice(range(self.board["y_size"])))
                self.flip(flip_cell)
        else:        
            flip_cell = random.choice(best_list)
            self.flip(flip_cell)

Example #4

0

Show file

File: game.py Project: PeterBorah/atabot

    def use_pogo(self):
        
        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        
        if not best_list:
            current = tools.export(self.candidate)
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                            random.choice(range(self.board["y_size"])))
                x = flip_cell[0]
                y = flip_cell[1]
                self.candidate[y][x] = not self.candidate[y][x]
                self.total_needy -= self.impact[y][x]
                self.update_neediness(flip_cell)
                self.update_impact(flip_cell)
        else:        
            flip_cell = random.choice(best_list)
            x = flip_cell[0]
            y = flip_cell[1]
            self.candidate[y][x] = not self.candidate[y][x]
            self.total_needy -= self.impact[y][x]
            self.update_neediness(flip_cell)
            self.update_impact(flip_cell)

Example #5

0

Show file

File: game.py Project: better0123/kaggle-sample-code

    def use_pogo(self):

        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        if not best_list:
            current = tools.export(self.candidate)
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                             random.choice(range(self.board["y_size"])))
                x = flip_cell[0]
                y = flip_cell[1]
                self.candidate[y][x] = not self.candidate[y][x]
                self.total_needy -= self.impact[y][x]
                self.update_neediness(flip_cell)
                self.update_impact(flip_cell)
        else:
            flip_cell = random.choice(best_list)
            x = flip_cell[0]
            y = flip_cell[1]
            self.candidate[y][x] = not self.candidate[y][x]
            self.total_needy -= self.impact[y][x]
            self.update_neediness(flip_cell)
            self.update_impact(flip_cell)

Example #6

0

Show file

    def use_pogo(self):
        '''Execute one step of the Pogo algorithm.
        
        This is an algorithm I invented. Similar to the jittery hillclimbing
        algorithm, it looks for the cell(s) with the best impact. If no cell has a
        positive impact (which implies the search is at a local minima), it flips some 
        number of random cells instead. This number is equal to the number of times it has 
        encountered that particular minima. The idea is that it will jump "higher" 
        the more it seems to be getting stuck.
        
        '''

        best_score = 1
        best_list = []
        for j in range(self.board["y_size"]):
            for i in range(self.board["x_size"]):
                score = self.impact[j][i]
                if score > best_score:
                    best_score = score
                    best_list = [(i, j)]
                elif score == best_score:
                    best_list.append((i, j))

        if not best_list:
            current = tools.export(self.candidate)  # To get an immutable key
            if current in self.minima:
                self.minima[current] += 1
            else:
                self.minima[current] = 1
            for i in range(self.minima[current]):
                flip_cell = (random.choice(range(self.board["x_size"])),
                             random.choice(range(self.board["y_size"])))
                self.flip(flip_cell)
        else:
            flip_cell = random.choice(best_list)
            self.flip(flip_cell)

Example #7

0

Show file

File: govtrack.py Project: AuroraSkywalker/watchdog

                    out.active = True

                yield out

def parse_stats(metrics=METRICS):
    for metric in metrics:
        for fn in glob.glob(STATS_XML % metric):
            try:
                dom = pulldom.parse(fn)
            except IOError:
                continue
            for event, node in dom:
                if event == "START_ELEMENT" and node.tagName == 'representative':
                    yield web.storage(node.attributes.items())

def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}

if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    if current_session == 110: tools.export(parse_fec())

Example #8

0

Show file

File: irs_5500.py Project: AuroraSkywalker/watchdog

def_5500 = [
  ('unk1_digits', 26, string),
  ('unk2', 8, date),
  ('unk3', 8, date),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('unk4', 1, integer),
  ('plan_name', 140, string),
  ('unk5', 8, date),
  ('corp_name', 141, string),
  ('street1', 35, string),
  ('street2', 108, string),
  ('city', 22, string),
  ('state', 2, state),
  ('zip', 5, digits),
  ('zip4', 4, digits),
  ('unk6', 3, string),
  (None, 792, filler), # unparsed
  (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))

Example #9

0

Show file

File: punch.py Project: AuroraSkywalker/watchdog

    for row in r_row.findall(d):
        out = r_td.findall(row)
        if out:
            dist, membername = r_member.findall(row)[0]
            dist = dist.replace("At Large", "00")
            dist = dist[:2] + "-" + dist[2:].zfill(2)

            s = web.storage()
            s.district = dist
            s.progressive2008 = fixdec(out[0])
            s.chips2008 = fixdec(out[1])
            s.progressiveall = fixdec(out[3])
            s.name = membername.decode("iso-8859-1")

            yield s


def parse_all():
    d = file("../data/crawl/punch/house.html").read()
    for x in parse_doc(d):
        yield x
    d = file("../data/crawl/punch/senate.html").read()
    for x in parse_doc(d):
        yield x


if __name__ == "__main__":
    import tools

    tools.export(parse_all())

Example #10

0

Show file

File: soi.py Project: AuroraSkywalker/watchdog

            except err: pass
                
        try: out.gini = gini_est(out.brackets)
        except MissingData: pass
        
        yield out
        
        loc += 8

def parse_soi(verbose=False):
    import sys

    states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 
    'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 
    'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 
    'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 
    'WA', 'WI', 'WV', 'WY']

    if verbose: print>>sys.stderr
    for state in states:
        if verbose: print>>sys.stderr, "\rParsing", state + '...',
        for x in parse_state(state):
            if x.loc.strip() == 'Total':
                x.loc = state
            yield x
    if verbose: print >>sys.stderr, '\r                     '

if __name__ == "__main__":
    import tools
    tools.export(parse_soi(verbose=True))

Example #11

0

Show file

File: mortality.py Project: ChunHungLiu/watchdog-1

    out.population = x['Population']
    if 'Suppressed' in x['Crude Rate']:
        out.crude_rate_suppressed = True
    else:
        out.crude_rate_suppressed = False
        crude_rate = re.search(r'(?P<rate>\d*(\.\d*))', x['Crude Rate'])
        if crude_rate:
            out.crude_rate = crude_rate.group('rate')
        unreliable = 'Unreliable' in x['Crude Rate']
        out.crude_rate_reliable = not unreliable
    return out


def parse_file(fn):
    if not glob(fn):
        return
    parser = csv.DictReader(file(fn), delimiter="\t")
    for x in parser:
        # IgnoreTotal and footer lines
        if x['Notes'] == "Total" or x['Count'] == None:
            continue
        else:
            yield parse_line(x)


if __name__ == "__main__":
    import sys
    import tools
    for i in sys.argv[1:]:
        tools.export(parse_file(i))

Example #12

0

Show file

File: irs_pol.py Project: AuroraSkywalker/watchdog

        ("transmission_time", string),  # @@
        ("record_count", integer),
    ],
}


def parse_line(s):
    if s[0] in def_polorgs:
        return parse_line_type(s.strip(), def_polorgs[s[0]])
    else:
        warnings.warn("Don't recognize: " + s[0])


def parse_line_type(line, def4type):
    out = web.storage()
    for (name, kind), val in zip(def4type, line.split("|")):
        out[name] = kind(val)
        print name, val, kind(val)
    return out


def parse_doc(doc):
    for line in doc:
        yield parse_line(line)


if __name__ == "__main__":
    import tools

    tools.export(parse_doc(file("../data/crawl/irs/pol/FullDataFile.txt")))

Example #13

0

Show file

File: fec_csv.py Project: AuroraSkywalker/watchdog

    amendment = parse_report(headers, amendment)
    report['committee'] = amendment['committee']
    report['candidate'] = amendment['candidate']
    for k, v in amendment['schedules'].items():
        report['schedules'][k] = v

def apply_amendments(headers, report, amendments):
    report_id = report['report_id']
    if amendments.has_key(report_id):
        amendments = amendments[report_id]
        for a in amendments:
            apply_amendment(headers, report, a)
    report['schedules'] = report['schedules'].values()
    return report

def parse_filings(headers, reports, amendments):
    for r in reports:
        print r['zfn']
        r = parse_report(headers, r)
        r = apply_amendments(headers, r, amendments)
        yield r

def parse_efilings(filepattern = EFILINGS_PATH + '*.zip'):
    headers = parse_headers()
    reports, amendments = file_index(filepattern)
    return parse_filings(headers, reports, amendments)

if __name__ == "__main__":
    tools.export(parse_efilings())

Example #14

0

Show file

    for row in parse_file(def_zip4, fh):
        if row['_type'] != 'ZIP+4 Detail': continue
        if row['congress_dist'] == 'AL':
            row['congress_dist'] = '00'
        if row['zip4_lo'] == row['zip4_hi']:
            zip4s = [row['zip4_lo']]
        else:
            zip4s = [str(x).zfill(4) for x in xrange(int(row['zip4_lo']), int(row['zip4_hi']) + 1)]
        for zip4 in zip4s:
            yield row['zip'] + '-' + zip4, row['state_abbrev'] + '-' + row['congress_dist']

if __name__ == "__main__":
    import sys, glob, tools
    
    def_map = {
      '--ctystate': def_ctystate, 
      '--5digit': def_5digit, 
      '--zip4': def_zip4, 
      '--delstat': def_delstat
    }
    
    if sys.argv[1] in def_map:
        for fn in glob.glob(sys.argv[2] + '*.txt'):
            tools.export(parse_file(def_map[sys.argv[1]], file(fn)))
    elif sys.argv[1] == '--tiger':
        for fn in glob.glob(sys.argv[2] + '*/*.txt'):
            tools.export(parse_tigerzip(file(fn)))
    elif sys.argv[1] == '--tigerdat':
        for fn in glob.glob(sys.argv[2] + '*/TIGER.DAT'):
            tools.export(parse_tigerdat(file(fn)))

Example #15

0

Show file


def parse_stats(metrics=METRICS):
    for metric in metrics:
        for fn in glob.glob(STATS_XML % metric):
            try:
                dom = pulldom.parse(fn)
            except IOError:
                continue
            for event, node in dom:
                if event == "START_ELEMENT" and node.tagName == 'representative':
                    yield web.storage(node.attributes.items())


def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}


if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    if current_session == 110: tools.export(parse_fec())

Example #16

0

Show file

File: fec.py Project: christopherbdnk/watchdog

  ('src_occupation', 35, get_data_str),
  ('date', 8, get_data_date),
  ('amount', 7, get_data_int),
  ('src_id', 9, get_data_str),
  ('fec_record_id', 7, get_data_str)
]

def parse_candidates():
    return read_fec_file(file("../data/crawl/fec/2008/weball.dat"), WEB_ROW_DEF)
def parse_committees():
    return read_fec_file(file("../data/crawl/fec/2008/cm.dat"), CM_DEF)
def parse_transfers():
    return read_fec_file(file("../data/crawl/fec/2008/pas2.dat"), PAS2_DEF)
def parse_contributions():
    return read_fec_file(gzip.open("../data/crawl/fec/2008/indiv.dat.gz"), INDIV_ROW_DEF)

if __name__ == "__main__":
    import tools
    tools.export(parse_committees())
    tools.export(parse_transfers())
    tools.export(parse_contributions())    
    tools.export(parse_candidates())
    tools.export(parse_transfers())

#result = read_fec_file("WEBL08.DAT",WEB_ROW_DEF,WEB_ROW_DEF_SIZE)
#result = read_fec_file("cansum04.txt",CANSUM_ROW_DEF,CANSUM_ROW_DEF_SIZE)
#result = read_fec_file("CANSUM94.DAT",CANSUM_94_ROW_DEF,CANSUM_94_ROW_DEF_SIZE)
#result = read_fec_file("CANSUM90.DAT",CANSUM_90_ROW_DEF,CANSUM_90_ROW_DEF_SIZE)
#result = read_fec_file("cansum88.dat",CANSUM_88_ROW_DEF,CANSUM_88_ROW_DEF_SIZE)
#print result

Example #17

0

Show file

        ('transmission_date', date),
        ('transmission_time', string),  #@@
        ('record_count', integer)
    ]
}


def parse_line(s):
    if s[0] in def_polorgs:
        return parse_line_type(s.strip(), def_polorgs[s[0]])
    else:
        warnings.warn("Don't recognize: " + s[0])


def parse_line_type(line, def4type):
    out = web.storage()
    for (name, kind), val in zip(def4type, line.split('|')):
        out[name] = kind(val)
        print name, val, kind(val)
    return out


def parse_doc(doc):
    for line in doc:
        yield parse_line(line)


if __name__ == "__main__":
    import tools
    tools.export(parse_doc(file('../data/crawl/irs/pol/FullDataFile.txt')))

Example #18

0

Show file

File: shapes.py Project: AuroraSkywalker/watchdog

                num, fipscode, distname, distdesc, ignore, ignore2 = \
                        [x.strip().strip('"') for x in lines]
                distnum = None
            if not fipscode.strip(): continue
            shapeid2district[num] = (fipscode, distnum)
        
        out = {}
        for line in file(DATA_DIR + '/'+filename+'.dat'):
            nums = line.strip().split()
            if len(nums) == 3:
                shapeid = nums[0] # other points are the center
                if shapeid in shapeid2district:
                    SKIPME = False
                    district = shapeid2district[shapeid]
                    out.setdefault(district, [])
                    out[district].append([])
                else:
                    SKIPME = True
            elif len(nums) == 2 and not SKIPME:
                out[district][-1].append((float(nums[0]), float(nums[1])))
        
        for (fipscode, distnum), shapes in out.iteritems():
            yield {
              '_type': 'district', 
              'state_fipscode': fipscode, 
              'district': distnum,
              'shapes': shapes
            }

if __name__ == "__main__": tools.export(parse())

Example #19

0

Show file

File: govtrack.py Project: christopherbdnk/watchdog

            
            if 'current-committee-assignment' in [
              hasattr(x, 'tagName') and x.tagName for x in node.childNodes
            ]:
                out.active = True

            yield out

def parse_stats(metrics=METRICS):
    for metric in metrics:
        dom = pulldom.parse(STATS_XML % metric)
        for event, node in dom:
            if event == "START_ELEMENT" and node.tagName == 'representative':
                yield web.storage(node.attributes.items())

def parse_fec():
    dom = pulldom.parse(FEC_XML)
    for event, node in dom:
        if event == "START_ELEMENT" and node.tagName == 'candidate':
            dom.expandNode(node)
            fec_id = node.getElementsByTagName('id')[0].firstChild.nodeValue
            uri = node.getElementsByTagName('uri')[0].firstChild.nodeValue
            if fec_id in uri: continue
            bioguide_id = uri.split('/')[-1]
            yield {'fecid': fec_id, 'bioguideid': bioguide_id}

if __name__ == "__main__":
    tools.export(parse_basics())
    tools.export(parse_stats())
    tools.export(parse_fec())

Example #20

0

Show file

    ('tax_period', 227 - 221, string),
    ('asset_code', 1, string),
    ('income_code', 1, string),
    ('filing_requirement_code', 3, string),
    (None, 3, filler),
    ('accounting_period', 2, string),
    ('asset_amt', 250 - 237, integer),
    ('income_amt', 264 - 250, integer2),
    ('form_990_revenue_amt', 278 - 264, integer2),
    ('ntee_code', 282 - 278, string),
    ('sort_name', 318 - 282, string),
    (None, 2, filler('\r\n'))
]


def parse():
    return itertools.chain(*[
        parse_file(def_eo, file(fn))
        for fn in glob.glob('../data/crawl/irs/eo/*.LST')
    ])


if __name__ == "__main__":
    import sys
    if 'load' in sys.argv:
        from settings import db
        db.multiple_insert("exempt_org", parse(), seqname=False)
    else:
        import tools
        tools.export(parse())

Example #21

0

Show file

File: earmarks.py Project: ChunHungLiu/watchdog-1

def parse_row(row):
    out = web.storage()
    for n, item in enumerate(fmt):
        out[item] = row[n]
    out.house_member = (out.house_member or []) and [
        x.strip() for x in out.house_member.split(';')
    ]
    out.senate_member = (out.senate_member or []) and [
        x.strip() for x in out.senate_member.split(';')
    ]
    #out.state = (out.state or []) and [x.strip() for x in out.state.split(';')]
    return out


def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break
        # The id's aren't remotely uniq, map to something that is
        r.id = n + 1  # Lets start at 1 instead of 0
        yield r


if __name__ == "__main__":
    import tools
    tools.export(parse_file(EARMARK_FILE))

Example #22

0

Show file

File: earmarks.py Project: AuroraSkywalker/watchdog

  'house_member', 'house_party', 'house_state', 'district',
  'senate_member', 'senate_party', 'senate_state',
  'presidential', 'undisclosed', 'intended_recipient',
  'notes'
)

def parse_row(row):
    out = web.storage()
    for n, item in enumerate(fmt):
        out[item] = row[n]
    out.house_member = (out.house_member or []) and [x.strip() for x in out.house_member.split(';')]
    out.senate_member = (out.senate_member or []) and [x.strip() for x in out.senate_member.split(';')]
    #out.state = (out.state or []) and [x.strip() for x in out.state.split(';')]
    return out

def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break 
        # The id's aren't remotely uniq, map to something that is
        r.id=n+1 # Lets start at 1 instead of 0
        yield r

if __name__ == "__main__":
    import tools
    tools.export(parse_file(EARMARK_FILE))

Example #23

0

Show file

File: fec_csv.py Project: ChunHungLiu/watchdog-1

    for k, v in amendment['schedules'].items():
        report['schedules'][k] = v


def apply_amendments(headers, report, amendments):
    report_id = report['report_id']
    if amendments.has_key(report_id):
        amendments = amendments[report_id]
        for a in amendments:
            apply_amendment(headers, report, a)
    report['schedules'] = report['schedules'].values()
    return report


def parse_filings(headers, reports, amendments):
    for r in reports:
        print r['zfn']
        r = parse_report(headers, r)
        r = apply_amendments(headers, r, amendments)
        yield r


def parse_efilings(filepattern=EFILINGS_PATH + '*.zip'):
    headers = parse_headers()
    reports, amendments = file_index(filepattern)
    return parse_filings(headers, reports, amendments)


if __name__ == "__main__":
    tools.export(parse_efilings())

Example #24

0

Show file

            pacs = pacs.replace(',', '')
            indivs = indivs.replace(',', '')
            fhout.write(
                '\t'.join([opensecretsid, sector, total, pacs, indivs]) + '\n')

    fhout.close()


def parse_all():
    for fn in glob.glob(CANSUM % (2008, '*')):
        opensecretsid = fn.split('/')[-1].split('.')[0]
        try:
            s8 = parse_can(opensecretsid, 2008)
            try:
                s6 = parse_can(opensecretsid, 2006)
            except:
                yield s8
            else:
                s = web.storage()
                s.badmoney = s8.badmoney + s6.badmoney
                s.total = s8.total + s6.total
                s.business_pac = s8.business_pac + s6.business_pac
                yield s
        except:
            print "Could not read", opensecretsid


if __name__ == "__main__":
    import tools
    tools.export(parse_all())

Example #25

0

Show file

File: rvdb.py Project: ChunHungLiu/watchdog-1

        state_codes[line[0]] = line[1].strip().title()
    return state_codes

def parse_historical_voting():
    """
    Parse county-level data. The data is in the format:
    STATE_CODE  COUNTY_NAME DEMOCRAT_COUNT REPUBLICAN_COUNT OTHER_COUNT
    """
    state_codes = read_state_codes()
    files = glob.glob(DATA_PATH + '*')
    
    for fname in files[:-1]: # skip junk file
        for line in file(fname).readlines():
            code, county_name, numbers = line.split('"')
            dem_count, rep_count, other_count = numbers.split()
            state = state_codes[code.strip()]
        
            yield {
              'n_democrats': dem_count,
              'n_republicans': rep_count,
              'n_other': other_count,
              'state_name': state,
              'state_fips': code.strip(),
              'county_name': county_name,
              'year': fname.split('/')[-1]
            }

if __name__ == "__main__":
    import tools
    tools.export(parse_historical_voting())

Example #26

0

Show file

File: earmarks.py Project: christopherbdnk/watchdog

    """Take rows (like those returned from getEarmarks) and hash them by representative. If no representative is listed
    then the earmark will be hashed under the "noname" key."""
    marks = getEarmarks(xlsFilename)
    byPerson = {}
    byPerson["noname"] = []
    for row in marks:
        people = row.houseMembers+row.senateMembers
        if len(people) == 0:
            byPerson["noname"].append(row)
        for person in people:
            if person in byPerson:
                byPerson[person].append(row)
            else:
                byPerson[person] = [row]
    return byPerson

def printEarmarks(rows):
    """Takes in an array of rows (like those produced from getEarmarks) and pretty prints them."""
    for row in rows:
        print row

#Examples of how to use, assuming the path to the earmarks file is passed in as the first arg.

#marks = getEarmarksByName(sys.argv[1])
#printEarmarks(marks["noname"])
#printEarmarks(marks["Edwards"])

if __name__ == "__main__":
    import tools
    tools.export(x.__dict__ for x in getEarmarks('../data/crawl/taxpayer/bigkahuna.xls'))

Example #27

0

Show file

File: fec_cobol.py Project: ChunHungLiu/watchdog-1

    for fn in fns:
        print >> sys.stderr, fn
        fh = gzip.open(fn)
        if '1980' in fn:
            cur_def = def_indiv_80
            fh = fix80(cur_def, fh)
        if '1990' in fn: cur_def = def_indiv_90
        if '1996' in fn: cur_def = def_indiv_96
        for elt in parse_file(cur_def, fh):
            yield elt


def parse_others():
    cur_def = def_oth_86
    for fn in sorted(glob.glob('../data/crawl/fec/*/oth.dat')):
        print >> sys.stderr, fn
        fh = file(fn)
        if '1990' in fn: cur_def = def_oth_90
        if '1996' in fn: cur_def = def_oth_96
        for elt in parse_file(cur_def, fh):
            yield elt


if __name__ == "__main__":
    import tools
    tools.export(parse_candidates())
    tools.export(parse_committees())
    tools.export(parse_transfers())
    tools.export(parse_contributions())
    tools.export(parse_others())

Example #28

0

Show file

File: soi.py Project: ChunHungLiu/watchdog-1

        yield out

        loc += 8


def parse_soi(verbose=False):
    import sys

    states = [
        'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI',
        'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN',
        'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH',
        'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA',
        'WI', 'WV', 'WY'
    ]

    if verbose: print >> sys.stderr
    for state in states:
        if verbose: print >> sys.stderr, "\rParsing", state + '...',
        for x in parse_state(state):
            if x.loc.strip() == 'Total':
                x.loc = state
            yield x
    if verbose: print >> sys.stderr, '\r                     '


if __name__ == "__main__":
    import tools
    tools.export(parse_soi(verbose=True))

Example #29

0

Show file

def_5500 = [
    ('unk1_digits', 26, string),
    ('unk2', 8, date),
    ('unk3', 8, date),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('unk4', 1, integer),
    ('plan_name', 140, string),
    ('unk5', 8, date),
    ('corp_name', 141, string),
    ('street1', 35, string),
    ('street2', 108, string),
    ('city', 22, string),
    ('state', 2, state),
    ('zip', 5, digits),
    ('zip4', 4, digits),
    ('unk6', 3, string),
    (None, 792, filler),  # unparsed
    (None, 2, filler('\r\n'))
]

if __name__ == "__main__":
    import tools
    tools.export(
        parse_file(def_5500, file('../data/crawl/irs/5500/F_5500_2006.txt')))

Example #30

0

Show file

File: voteview.py Project: AuroraSkywalker/watchdog

       for line in file(fn):
           out = web.storage()
           out.congress = int(line[0:4])
           out.icpsr_id = int(line[4:10])
           out.icpsr_state = int(line[10:13])
           out.district = int(line[13:15])        
           out.state_name = line[15:23].strip()
           out.party_code = int(line[23:28])
           out.last_name = line[28:41].strip()
           out.dim1 = float(line[41:47])
           out.dim2 = float(line[47:54])
           out.std1 = float(line[54:61])
           out.std2 = float(line[61:68])
           out.corr = float(line[68:75])
           out.loglike = float(line[75:87])
           out.n_votes = int(line[87:92])
           out.n_errs = int(line[92:97])
           out.n_geomeanprob = float(line[97:104])
           
           if out.icpsr_state in state_map:
               out.state_code = state_map[out.icpsr_state]
               if out.district:
                   out.district_id = out.state_code + '-' + str(out.district).zfill(2)
               else:
                   out.district_id = out.state_code 
           
           yield out

if __name__ == "__main__":
    tools.export(parse())

Example #31

0

Show file

File: irs_eo.py Project: gregelin/watchdog

  ('subsection_code', 189-187, string),
  ('affiliation', 1, enum),
  ('classification_code', 194-190, string),
  ('ruling_date', 200-194, date),
  ('deductibility_code', 1, string),  
  ('foundation_code', 2, string),  
  ('activity_code', 212-203, string),  
  ('organization_code', 1, string),  
  ('exempt_org_status_code', 2, string),  
  ('advance_ruling_expiration', 221-215, date),  
  ('tax_period', 227-221, string),  
  ('asset_code', 1, string),  
  ('income_code', 1, string),  
  ('filing_requirement_code', 3, string),  
  (None, 3, filler),  
  ('accounting_period', 2, string),  
  ('asset_amt', 250-237, integer),  
  ('income_amt', 264-250, integer2),  
  ('form_990_revenue_amt', 278-264, integer2),  
  ('ntee_code', 282-278, string),  
  ('sort_name', 318-282, string),
  (None, 2, filler('\r\n'))

]

if __name__ == "__main__":
    import glob
    import tools
    for fn in glob.glob('../data/crawl/irs/eo/*.LST'):
        tools.export(parse_file(def_eo, file(fn)))