def get_clean_csv(): years = ['2010', '2009', '2008', '2005'] keep = [ 'earmark_id', 'earmark_code', 'agency_title', 'bureau_title', 'account_title', 'program', 'enacted_year', 'short_description', 'earmark_description', 'earmark_type_name', 'spendcom', 'recipient' ] keepset = set(keep) ds = [] for year in years: fname = configuration.get_path_to_omb_data() +year+'.csv' d = pd.read_csv(fname, low_memory=False) d.columns = [h.lower().replace(" ", "_") for h in d.columns] if year == '2005': d['earmark_id'] = range(d.shape[0]) d['short_description'] = d['earmark_short_description'] ds.append(d) d = pd.concat(ds) ear = pd.concat(ds)[keep] new_index = [ 'earmark_id', 'earmark_code', 'agency', 'bureau', 'account', 'program', 'enacted_year', 'short_description', 'full_description', 'earmark_type', 'spendcom', 'recipient' ] ear.columns = new_index ear = ear.groupby('earmark_id').apply(get_recipient) ear['full_description'] = ear.full_description.map(shorten_full_description) ear.apply(convert).to_csv(os.path.join(configuration.get_path_to_omb_data(), 'all.csv'), header=True, index=False)
def import_to_db(): with open(os.path.join(configuration.get_path_to_omb_data(), 'all.csv', 'rb')) as f: reader = csv.reader(f) reader.next() rows = [] for row in reader: rows.append(row) print len(rows) conn = psycopg2.connect(CONN_STRING) cmd = "insert into earmarks ("+", ".join(new_index)+") values ("+", ".join(["%s"]*len(new_index))+")" print cmd params = rows cur = conn.cursor() cur.execute ("delete from earmarks") cur.executemany(cmd, params) #conn.commit() conn.close()
import os, sys, inspect sys.path.insert( 0, os.path.realpath( os.path.abspath( os.path.join( os.path.split(inspect.getfile(inspect.currentframe()))[0], "..")))) import urllib import zipfile from util import configuration omb_path = configuration.get_path_to_omb_data() def download_and_extract(url): path_to_new_zip = os.path.join(omb_path, url.split("/")[-1]) urllib.urlretrieve(url, path_to_new_zip) zfile = zipfile.ZipFile(path_to_new_zip) zfile.extractall(omb_path) def get_2010_earmarks(): url = "http://earmarks.omb.gov/earmarks-public/resources/downloads/2010-appropriation-earmark-extract.zip" download_and_extract(url) os.rename( os.path.join(omb_path, "2010-appropriations-earmark-extract.csv"), os.path.join(omb_path, "2010.csv")) def get_2009_earmarks():
import os, sys, inspect sys.path.insert(0, os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile( inspect.currentframe() ))[0],"..")))) import urllib import zipfile from util import configuration omb_path = configuration.get_path_to_omb_data() def download_and_extract(url): path_to_new_zip = os.path.join(omb_path,url.split("/")[-1]) urllib.urlretrieve(url, path_to_new_zip) zfile = zipfile.ZipFile(path_to_new_zip) zfile.extractall(omb_path) def get_2010_earmarks(): url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2010-appropriation-earmark-extract.zip" download_and_extract(url) os.rename(os.path.join(omb_path, "2010-appropriations-earmark-extract.csv"), os.path.join(omb_path, "2010.csv")) def get_2009_earmarks(): url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2009-appropriations-earmark-extract.zip" download_and_extract(url) os.rename(os.path.join(omb_path, "2009-appropriations-earmark-extract.csv"), os.path.join(omb_path, "2009.csv")) def get_2008_earmarks(): url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2008-appropriation-earmark-extract.zip" download_and_extract(url) os.rename(os.path.join(omb_path, "database.csv"), os.path.join(omb_path, "2008.csv")) def get_2005_earmarks(): url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/appropriation-earmark-extract.zip"