def geoclient_intersection(streetNumber, streetName, boroughName):
    ''' retrieve intersection street1 and street2 with main street''' 
    g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8')
    response_dict = {}
    dataGeo = g.address(streetNumber, streetName, boroughName)
    sideOfStreet = dataGeo['firstStreetNameNormalized']
    fromStreet = dataGeo['highCrossStreetName1']
    toStreet = dataGeo['lowCrossStreetName1']
    borough = dataGeo['firstBoroughName']
    response_dict.update({'sideOfStreet': sideOfStreet})
    response_dict.update({'fromStreet': fromStreet})
    response_dict.update({'toStreet': toStreet})
    response_dict.update({'borough': borough})
    
    return response_dict  
Example #2
0
def geoclient_intersection(streetNumber, streetName, boroughName):
    ''' retrieve intersection street1 and street2 with main street'''
    g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8')
    response_dict = {}
    dataGeo = g.address(streetNumber, streetName, boroughName)
    sideOfStreet = dataGeo['firstStreetNameNormalized']
    fromStreet = dataGeo['highCrossStreetName1']
    toStreet = dataGeo['lowCrossStreetName1']
    borough = dataGeo['firstBoroughName']
    response_dict.update({'sideOfStreet': sideOfStreet})
    response_dict.update({'fromStreet': fromStreet})
    response_dict.update({'toStreet': toStreet})
    response_dict.update({'borough': borough})

    return response_dict
Example #3
0
def geoclientBatch(df, houseNo='houseNo', street='street', boro='boro'):
    '''
    Uses DOITT's GeoClient (the web API to DCP's GeoSupport)     
    via the python wrapper https://github.com/talos/nyc-geoclient
    to geocode a dataframe df with columns number, street, and boro.
    
    Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
    '''
    geoID = 'fb9ad04a'
    geoKey = '051f93e4125df4bae4f7c57517e62344'
    g = Geoclient(geoID, geoKey)
    warnings.filterwarnings('ignore')  #do not display warnings

    def hitGeoC(df):
        try:
            x = g.address(df[houseNo], df[street], df[boro])
            BBL = x['bbl']
            BIN = x['buildingIdentificationNumber']
        except:
            e = sys.exc_info()[0]
            BBL = ("Error: %s" % e)
            BIN = BBL
        return BBL, BIN

    df[['geocodedBBL', 'geocodedBIN']] = df.apply(hitGeoC,
                                                  axis=1).apply(pd.Series)
    return df
Example #4
0
def geoclientBatch(df, houseNo='houseNo', street='street', boro='boro'):
    '''
    Uses DOITT's GeoClient (the web API to DCP's GeoSupport)     
    via the python wrapper https://github.com/talos/nyc-geoclient
    to geocode a dataframe df with columns number, street, and boro.
    
    Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
    '''
    geoID = 'fb9ad04a'
    geoKey = '051f93e4125df4bae4f7c57517e62344'
    g = Geoclient(geoID, geoKey)
    warnings.filterwarnings('ignore')  #do not display warnings

    def hitGeoC(df):
        # try to query the Geoclient API
        try:
            x = g.address(df[houseNo], df[street], df[boro])

            # try to get BBL
            try:
                BBL = x['bbl']
            # if there is a proxy error, display "---ProxyError---"
            except ProxyError:
                BBL = '---ProxyError---'
            # if there is any other error, display "---InvalidAddress---"
            except:
                BBL = ''

            # try to get BIN
            try:
                BIN = x['buildingIdentificationNumber']
            # if there is a proxy error, display "---ProxyError---"
            except ProxyError:
                BIN = '---ProxyError---'
            # if there is any other error, display "---InvalidAddress---"
            except:
                BIN = ''

        # if there is a proxy error, display "---ProxyError---" for every value
        except ProxyError:
            error_message = '---ProxyError---'
            BBL = error_message
            BIN = error_message

        # if there is any other error, display "---InvalidAddress---" for every value
        except:
            error_message = '---InvalidAddress---'
            BBL = error_message
            BIN = error_message

        # return the geocoded columns
        return BBL, BIN

    # applies the "hitGeoC" function to every row in the DataFrame
    df[['geoBBL', 'geoBIN']] = df.apply(hitGeoC, axis=1).apply(pd.Series)

    return df
Example #5
0
def main():

    ###Instantiate geoclient wrapper
    g = Geoclient('7cb56bda', '51f262e341572a09e73aa32eb1dda793')

    ###Read in data
    bbl_to_nta = pd.read_csv('./../data/BBL_to_NTA.csv', dtype=str)
    file_names = make_file_names()

    bad_api_calls = {}

    for file in file_names:
        bad_api_calls[file] = {}

        if file[0:4] == '2010':
            skiprows_n = 3
        else:
            skiprows_n = 4
        boro_year_data = pd.read_excel('./../data/sales_data/{}'.format(file),
                                       sheetname=0,
                                       skiprows=range(skiprows_n))

        ###Clean and make new features
        boro_year_data = clean_sales_dataframe(boro_year_data)
        boro_year_data = make_new_sales_features(boro_year_data)

        ###First merge on BBL
        print 'Merging for year = {}, borough = {}'.format(
            *file.replace('.xls', '').split('_'))
        merged = pd.merge(boro_year_data, bbl_to_nta, on='BBL', how='left')

        ###Then use API to fill missed NTA_strings

        print 'Number missing NTA_strings after merge: ', sum(
            merged['NTA_string'].isnull())
        for index, row in merged.iterrows():
            if pd.isnull(row['NTA_string']):
                query_results = get_nta_name_through_api(row, g)
                merged.loc[index, 'NTA_string'] = query_results[0]
                if query_results[1] != 'No error':
                    bad_api_calls[file][index] = query_results[1]

        print 'Number missing NTA_strings after API call: ', sum(
            merged['NTA_string'].isnull())
        print 'Number of bad API calls: ', len(bad_api_calls[file]), '\n'

        ###Finally save NTA tagged data
        merged.to_csv('./../data/sales_data_nta_tagged/{}.csv'.format(
            file.replace('.xls', '')),
                      index=True)

    with open('./../data/sales_data_nta_tagged/bad_api_calls.json',
              'w') as outfile:
        json.dump(bad_api_calls, outfile)
with open('rfs.config.json') as conf:
    config = json.load(conf)

DBNAME = config['DBNAME']
DBUSER = config['DBUSER']
# load necessary environment variables
# set variables with following command: export SECRET_KEY="somesecretvalue"
app_id = config['GEOCLIENT_APP_ID']
app_key = config['GEOCLIENT_APP_KEY']

# connect to postgres db
engine = sql.create_engine('postgresql://{}@localhost:5432/{}'.format(
    DBUSER, DBNAME))

# get the geo data
g = Geoclient(app_id, app_key)


def get_loc(num, street, borough):
    geo = g.address(num, street, borough)
    try:
        lat = geo['latitude']
    except KeyError:
        try:
            lat = geo['latitude']
        except KeyError:
            lat = 'none'
    try:
        lon = geo['longitude']
    except:
        lon = 'none'
Example #7
0
You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
from nyc_geoclient import Geoclient
import os
import sys
import csv
import time

GEOCODE_FILE = sys.argv[1]

APP_ID = os.environ['GEOCLIENT_APP_ID']
APP_KEY = os.environ['GEOCLIENT_APP_KEY']


g = Geoclient(APP_ID, APP_KEY)

ERRORS = 0
PROCESSED = 0

def geocode():
    with open(GEOCODE_FILE + '-geocoded.csv', 'a') as geocode_file:
        writer = csv.writer(geocode_file, delimiter="|")
        with open(GEOCODE_FILE, 'r') as f:
            csv_file = csv.reader(f, delimiter="|")
            for row in csv_file:
                id = row[0]
                house_number = row[1]
                street = row[2]
                zipcode = row[3]
                info = g.address_zip(house_number, street, zipcode)
import json
import os
import glob
import sys
from time import sleep

# For each row in the TSV:
	# Geocode address
	# Write full json contents to a file
	# write the original data + school, lat and long to a second file

if len(sys.argv) != 5:
	print "Error: %s Need App ID, App Key, borough, and sold/listed status to run" % sys.argv[0]
	sys.exit(1)

g = Geoclient(sys.argv[1], sys.argv[2])
borough = sys.argv[3]
status = sys.argv[4].lower()

if status != 'sold' and status != 'listed':
	print "Error: %s Stats must be written as sold or listed" % sys.argv[4]
	sys.exit(1)


# Name the files that will be output, based on their status of sold or listed
path = borough + '_' + status + '.csv'
path2 = borough + '_' + status + '.json'

# For every file the directory
for file in glob.glob(os.getcwd() + "/" + borough + "/" + "*_" + status +".tsv"):
	# Use to skip unnecessary lines in file
import csv
from nyc_geoclient import Geoclient

# Read csv with ID and key.
# csv must be in same folder and be like the following
# "appID","appKey"
# "309245e","c45458765e3h8560erg898160"

with open('app_id_nyc.csv', 'rb') as f:
    reader = csv.reader(f)
    id_key = map(tuple, reader)

my_app_ID = id_key[1][0]
my_app_key = id_key[1][1]

g = Geoclient(my_app_ID, my_app_key)

with open('../out/nyc_sales_clean.csv', 'rb') as f:
    reader = csv.reader(f)
    nyc_sales = map(tuple, reader)

nrow_nyc = len(nyc_sales)

with open("../out/coords_nyc_api.csv", "wb") as csv_file:
    writer = csv.writer(csv_file, delimiter=',')
    writer.writerow([
        'id_sale', 'lat', 'long', 'returned_street_name', 'returned_zip_code'
    ])
    for i in range(1, len(nyc_sales)):
        print(str(i) + ' / ' + str(nrow_nyc))
        # address(houseNumber, street, borough)
Example #10
0
 def __init__(self, app_id, key):
     # empty for now
     self._g = Geoclient(app_id, key)
Example #11
0
class GeoHelper:
    def __init__(self, app_id, key):
        # empty for now
        self._g = Geoclient(app_id, key)

    def _find_id_column(self, col_list):
        ids_by_pref = [
            'BIN', 'BUILDINGIDENTIFICATIONNUMBER', 'GEOCODEDBIN', 'BBL',
            'GEOCODEDBBL', 'ADDRESS'
        ]
        col_map = dict(zip(map(str.upper, col_list), col_list))
        return next(
            (col_map[i] for i in ids_by_pref if i in map(str.upper, col_list)))

    def _inferredGeocoder(self, input_str):
        '''
        Attempts to infer the format of the input provided for geocoding, either BIN, BBL, or Street Address (very rudimentary). Will return the complete geoclient object.
        '''
        input_str = str(input_str).replace('.0', '')

        if input_str.isdigit():
            if len(input_str) == 10:  # BBL
                #print '10 digit number: attempting BBL'
                out = self._g.bbl(int(input_str[0]),int(input_str[1:6].lstrip("0")) , \
                            int(float(input_str[6:].lstrip("0"))))
            elif len(input_str) == 7:  # BIN
                #print '7 digit number: attempting BIN'
                out = self._g.bin(input_str)
            else:
                #print 'Unrecognized number of digits, no ID possible'
                out = None
        else:  # try to split addresses
            #print "Attempting to split text into house number / street name / borough."

            split = str.split(input_str)
            house_num = split[0]
            street_name = " ".join(split[1:-1])
            boro_name = split[-1]
            try:
                out = self._g.address(house_num, street_name, boro_name)
            except:
                print 'Format not recognized'
                out = None

        return out

    def _checkGeoclientValidity(self, geoclient_output):
        if 'returnCode1a' in geoclient_output:
            if str(geoclient_output['returnCode1a'][0]) == '0':
                return True
            elif geoclient_output['message']:
                return 'Error Code: ' + str(geoclient_output['message'])
        else:
            return 'error returned with no message'

    def _addressGeocoder(df):
        '''
        private function to make a generic call to NYC geoclientBatch.
        '''
        try:
            x = self._g.address(df[house_num], df[street], df[boro])
            BBL = x['bbl']
            BIN = x['buildingIdentificationNumber']
        except:
            e = _sys.exc_info()[0]
            BBL = ("Error: %s" % e)
            BIN = BBL
        return BBL, BIN

    def get_BINandBBL(self, df, identifier_col=None):
        '''
        Uses DOITT's GeoClient (the web API to DCP's GeoSupport)
        via the python wrapper https://github.com/talos/nyc-geoclient
        to geocode a dataframe df with columns number, street, and boro.

        Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
        '''

        if identifier_col:
            print 'using provided ID column: ' + identifier_col
        else:
            identifier_col = self._find_id_column(df.columns)
            print 'found ID column: ' + identifier_col

        def wrapper_func(x):
            out = self._inferredGeocoder(x[identifier_col])
            log = self._checkGeoclientValidity(out)
            if log is True:
                return out['bbl'], out['buildingIdentificationNumber']
            else:
                return log, log

        df[['geocodedBBL',
            'geocodedBIN']] = df.apply(lambda x: wrapper_func(x),
                                       axis=1).apply(_pd.Series)
        return df

    def GetLatLong(self, df, identifier_col=None):

        if identifier_col:
            print 'using provided ID column: ' + identifier_col
        else:
            identifier_col = self._find_id_column(df.columns)
            print 'found ID column: ' + identifier_col

        df['Latitude'] = _np.nan
        df['Longitude'] = _np.nan

        def wrapper_func(x):
            out = self._inferredGeocoder(x[identifier_col])
            log = self._checkGeoclientValidity(out)
            if log is True:
                return out['latitudeInternalLabel'], out[
                    'longitudeInternalLabel']
            else:
                return log, log

        df[['Latitude', 'Longitude']] = df.apply(lambda x: wrapper_func(x),
                                                 axis=1).apply(_pd.Series)
        return df
# coding: utf-8
# written for python 2 using nyc_geoclient
# https://github.com/talos/nyc-geoclient

import pandas as pd
import numpy as np
import os, re, time
import sqlite3 as lite
from nyc_geoclient import Geoclient

# read-in NYC Geoclient API token
token=open('NYC_Geoclient_token.txt')
g=Geoclient(token.readline().strip('\n'), token.readline().strip('\n'))

def clean_strings(x):
    try:
        return str(x).strip()
    except ValueError:
        return np.nan
    
def parse_address(address):
    separators=['Apt','APT','#']
    # gets rid of the apartments in the address
    for separator in separators:
        if address.find(separator)!=-1:
            no_apt=address.split(separator,1)[0]
        else:
            no_apt=address
        #apartments can also be indicated by comma followed by number with optional letter (ex: , 503C)
        street=re.split(r'(,\s\d+$|,\s\d+\w{1}$)', no_apt)
        street=street[0] # grab what came before the apartment
Example #13
0
# Read CSV of rent stabilized properties and grab BBL from NYC's GeoClient API
# takes an input CSV file name and output CSV file name as argv
# first two columns of input csv must be address number and address name
# hardcoded for manhattan only, will update in the future
# run script by doing: python geo-client-api-test.py input.csv output.csv

from sys import argv
from nyc_geoclient import Geoclient
import csv
import json

script, infile, outfile = argv

g = Geoclient('9cd0a15f', '54dc84bcaca9ff4877da771750033275')

#test = g.address('140-154', 'West 72nd Street', 'Manhattan')
#print json.dumps(test, sort_keys=True)

print "opening file: %s" % infile
with open(infile, 'r') as i:

    reader = csv.reader(i)

    print "opening file: %s" % outfile
    with open(outfile, 'w') as o:

        writer = csv.writer(o, lineterminator='\n')
        all = []
        row = next(reader, None)
        row.append('bbl')
        all.append(row)