def main(): assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR files = glob.glob(ALMANAC_DIR + '*/people/*/rep_*.htm') + \ glob.glob(ALMANAC_DIR + '*/people/*/*s[12].htm') files.sort() for fn in files: district = web.storage() demog = None dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_') diststate = dist[0:2].upper() distnum = dist[-2:] distname = tools.fixdist(diststate + '-' + distnum) d = almanac.scrape_person(fn) load_election_results(d, distname) if ALMANAC_DIR + '2008' in fn: if 'demographics' in d: demog = d['demographics'] elif distname[-2:] == '00' or '-' not in distname: # if -00 then this district is the same as the state. #print "Using state file for:", distname statefile = ALMANAC_DIR + '2008/states/%s/index.html' % diststate.lower() demog = almanac.scrape_state(statefile).get('state') demog_to_dist(demog, district) district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):] #print 'district:', distname, pformat(district) db.update('district', where='name=$distname', vars=locals(), **district)
def main(): assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR files = glob.glob(ALMANAC_DIR + 'people/*/rep_*.htm') + \ glob.glob(ALMANAC_DIR + 'people/*/*s[12].htm') files.sort() for fn in files: district = web.storage() demog = None dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_') diststate = dist[0:2].upper() distnum = dist[-2:] distname = tools.fixdist(diststate + '-' + distnum) d = almanac.scrape_person(fn) load_election_results(d, distname) if 'demographics' in d: demog = d['demographics'] elif distname[-2:] == '00' or '-' not in distname: # if -00 then this district is the same as the state. #print "Using state file for:", distname statefile = ALMANAC_DIR + 'states/%s/index.html' % diststate.lower() demog = almanac.scrape_state(statefile).get('state') demog_to_dist(demog, district) district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):] #print 'district:', distname, pformat(district) db.update('district', where='name=$distname', vars=locals(), **district)
def main(): districts = simplejson.load(file(DATA_DIR + '/load/districts/index.json')) assert os.path.exists(ALMANAC_DIR), ALMANAC_DIR out = {} for fn in glob.glob(ALMANAC_DIR + 'people/*/rep*'): district = web.storage() dist = web.lstrips(web.rstrips(fn.split('/')[-1], '.htm'), 'rep_') diststate = dist[0:2].upper() distnum = dist[-2:] d = almanac.scrape_person(fn) if 'demographics' in d: demog = d['demographics'] else: #@@ maybe only when diststate + '-00' in districts? statefile = ALMANAC_DIR + 'states/%s/index.html' % diststate.lower() demog = almanac.scrape_state(statefile).get('state') if demog: district.cook_index = get_int(demog, 'Cook Partisan Voting Index') district.area_sqmi = cleanint(web.rstrips(demog['Area size'], ' sq. mi.')) district.poverty_pct = get_int(demog, 'Poverty status') district.median_income = get_int(demog, 'Median income') (district.est_population_year, district.est_population) = coalesce_population(demog, [ (2006, 'Pop. 2006 (est)'), (2005, 'Pop. 2005 (est)'), (2000, 'Pop. 2000'), ]) if 'interest_group_rating' in d: district.interest_group_rating = d['interest_group_rating'] district.almanac = 'http://' + d['filename'][d['filename'].find('nationaljournal.com'):] # Nationaljournal numbers districts of congressmen-at-large # and territorial delegates '01' in its URLs, but our # districts file numbers them '00'. if distnum == '01' and diststate + '-00' in districts: distnum = '00' out[diststate + '-' + distnum] = district return out