Example #1
0
def get_clean_csv():
    years = ['2010', '2009', '2008', '2005']

    keep = [
            'earmark_id',
            'earmark_code',
            'agency_title',
            'bureau_title',
            'account_title',
            'program',
            'enacted_year',
            'short_description',
            'earmark_description',
            'earmark_type_name',
            'spendcom',
            'recipient'
            ]

    keepset = set(keep)

    ds = []
    for year in years:
        fname = configuration.get_path_to_omb_data() +year+'.csv'
        d = pd.read_csv(fname, low_memory=False)
        d.columns = [h.lower().replace(" ", "_") for h in d.columns]
        if year == '2005':
            d['earmark_id'] = range(d.shape[0])
            d['short_description'] = d['earmark_short_description']
        ds.append(d)

    d = pd.concat(ds)

    ear = pd.concat(ds)[keep]

    new_index = [
            'earmark_id',
            'earmark_code',
            'agency',
            'bureau',
            'account',
            'program',
            'enacted_year',
            'short_description',
            'full_description',
            'earmark_type',
            'spendcom',
            'recipient'
            ]

    ear.columns =  new_index
    ear = ear.groupby('earmark_id').apply(get_recipient)
    ear['full_description'] = ear.full_description.map(shorten_full_description)
    ear.apply(convert).to_csv(os.path.join(configuration.get_path_to_omb_data(), 'all.csv'), header=True, index=False)
Example #2
0
def import_to_db():

    with open(os.path.join(configuration.get_path_to_omb_data(), 'all.csv', 'rb')) as f:
        reader = csv.reader(f)
        reader.next()
        rows = []
        for row in reader:
            rows.append(row)

        print len(rows)

    conn = psycopg2.connect(CONN_STRING)
    cmd = "insert into earmarks ("+", ".join(new_index)+") values ("+", ".join(["%s"]*len(new_index))+")"
    print cmd
    params = rows
    cur = conn.cursor()
    cur.execute ("delete from earmarks")
    cur.executemany(cmd, params)
    #conn.commit()
    conn.close()
Example #3
0
import os, sys, inspect
sys.path.insert(
    0,
    os.path.realpath(
        os.path.abspath(
            os.path.join(
                os.path.split(inspect.getfile(inspect.currentframe()))[0],
                ".."))))
import urllib
import zipfile
from util import configuration

omb_path = configuration.get_path_to_omb_data()


def download_and_extract(url):
    path_to_new_zip = os.path.join(omb_path, url.split("/")[-1])
    urllib.urlretrieve(url, path_to_new_zip)
    zfile = zipfile.ZipFile(path_to_new_zip)
    zfile.extractall(omb_path)


def get_2010_earmarks():
    url = "http://earmarks.omb.gov/earmarks-public/resources/downloads/2010-appropriation-earmark-extract.zip"
    download_and_extract(url)
    os.rename(
        os.path.join(omb_path, "2010-appropriations-earmark-extract.csv"),
        os.path.join(omb_path, "2010.csv"))


def get_2009_earmarks():
import os, sys, inspect
sys.path.insert(0, os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile( inspect.currentframe() ))[0],".."))))
import urllib
import zipfile
from util import configuration

omb_path = configuration.get_path_to_omb_data()

def download_and_extract(url):
    path_to_new_zip = os.path.join(omb_path,url.split("/")[-1])
    urllib.urlretrieve(url, path_to_new_zip)
    zfile = zipfile.ZipFile(path_to_new_zip)
    zfile.extractall(omb_path)

def get_2010_earmarks():
    url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2010-appropriation-earmark-extract.zip"
    download_and_extract(url)
    os.rename(os.path.join(omb_path, "2010-appropriations-earmark-extract.csv"), os.path.join(omb_path, "2010.csv"))

def get_2009_earmarks():
    url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2009-appropriations-earmark-extract.zip"
    download_and_extract(url)
    os.rename(os.path.join(omb_path, "2009-appropriations-earmark-extract.csv"), os.path.join(omb_path, "2009.csv"))

def get_2008_earmarks():
    url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/2008-appropriation-earmark-extract.zip"
    download_and_extract(url)
    os.rename(os.path.join(omb_path, "database.csv"), os.path.join(omb_path, "2008.csv"))

def get_2005_earmarks():
    url= "http://earmarks.omb.gov/earmarks-public/resources/downloads/appropriation-earmark-extract.zip"