line = line.strip().split('|')
        line_dict = {k: v for k, v in zip(mapping, line)}
        out_file_obj.write(dumps(line_dict))
        out_file_obj.write("\n")


# For each data element, pull the data itself and the header. Combine to form JSON then upload.

if __name__ == "__main__":
    import sys
    azure = len(sys.argv) > 1 and sys.argv[1] == "azure"
    blob_upload = None
    if azure:
        print("Upload to azure")
        from azureblob.upload_file_to_blob import BlobUploader
        blob_upload = BlobUploader(make_container_public=True)
    else:
        print("No azure upload requested. Pulling locally only.")

    for key, files in sources.items():
        print("Downloading " + key)
        local_zip_file = "{0}/{1}.zip".format(tmp_data_folder, key)
        local_unzip_dir = "{0}/{1}".format(tmp_data_folder, key)
        pull_ftp_data(files['data'], local_zip_file)
        mapping = pull_http_data(files['mapping']).strip()

        mapping = mapping.split(',')

        # unzip the data
        z = ZipFile(local_zip_file)
        z.extractall(local_unzip_dir)
Beispiel #2
0
"""
Run the program.
"""

if len(sys.argv) < 3:
    print("Usage: python download_fec_filings.py fecdefs.json 20151103 20151104 [azure]")
    print("This would download and process all FEC data for Nov 3 and Nov 4.")

mindate_s = sys.argv[2]
maxdate_s = sys.argv[3]

blob_upload = None
if len(sys.argv) > 4 and sys.argv[4] == 'azure':
    print("Will upload to Azure")
    from azureblob.upload_file_to_blob import BlobUploader
    blob_upload = BlobUploader(make_container_public=True)
else:
    print("No azure upload requested.")

print("Running from {0} to {1}".format(mindate_s, maxdate_s))

mindate = datetime.strptime(mindate_s, "%Y%m%d")
maxdate = datetime.strptime(maxdate_s, "%Y%m%d")


tmp_data_folder = "./tmp_data"
clean_data_folder = "./data"


try:
    os.mkdir(tmp_data_folder)
Beispiel #3
0
import pandas as pd

zipcodes = 'http://www2.census.gov/geo/relfiles/cdsld13/natl/natl_zccd_delim.txt'
districts = 'http://www2.census.gov/geo/docs/reference/codes/files/national_cd113.txt'

# Get data
df_zip = pd.read_csv(zipcodes, skiprows=1)
df_zip.columns = ['State', 'ZCTA', 'District']
df_districts = pd.read_table(districts, delimiter='\s\s+', header=None, skiprows=1, engine='python')
df_districts.columns = ['STATE', 'STATEKEY', 'DISTRICT', 'DISTRICTNAME']

# get just state bits from districts.
states = df_districts.groupby('STATE').agg({'STATEKEY': max}).reset_index()

#
df_zip_withstates = df_zip.merge(states, left_on='State', right_on='STATEKEY', how='left')
df_zip_withstates = df_zip_withstates.icol(range(4))
df_zip_withstates.columns = ['state_code', "zcta", "district", "state"]
df_zip_withstates.to_csv("data/zip_to_district.csv", index=False)

azure = len(sys.argv) > 1 and sys.argv[1] == 'azure'

if azure:
    print("Uploading to azure")
    from azureblob.upload_file_to_blob import BlobUploader
    blob_upload = BlobUploader(make_container_public=True)

    blob_upload.put_json_file(open("data/zip_to_district.csv", 'r'), "raw_mappings/zip_to_district.csv")


Beispiel #4
0
"""
Download the zcta to county file (many to many relationship)
"""

import requests
import sys

use_azure = len(sys.argv) > 1 and sys.argv[1] == 'azure'

file_loc = "http://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt"

# Get file locally
r = requests.get(file_loc)

local_file = open("data/zcta_to_county.csv", 'w')
local_file.write(r.text)
local_file.close()

if use_azure:
    print("Uploading to azure")
    from azureblob.upload_file_to_blob import BlobUploader
    blob_upload = BlobUploader(make_container_public=True)

    blob_upload.put_json_file(open("data/zcta_to_county.csv", 'r'), "raw_mappings/zcta_to_county.csv")