コード例 #1
0
ファイル: __init__.py プロジェクト: ninjapanther/tz-trout
def tz_ids_for_address(country, state=None, city=None, zipcode=None, **kwargs):
    """ Get the TZ identifiers for a given address, e.g.:

    >>> tztrout.tz_ids_for_address('US', state='CA', city='Palo Alto')
    [u'America/Los_Angeles']
    >>> tztrout.tz_ids_for_address('PL')
    [u'Europe/Warsaw']
    >>> tztrout.tz_ids_for_address('CN')
    [
        u'Asia/Shanghai',
        u'Asia/Harbin',
        u'Asia/Chongqing',
        u'Asia/Urumqi',
        u'Asia/Kashgar'
    ]
    """

    if country == 'US':
        if zipcode:
            if not isinstance(zipcode, basestring):
                zipcode = str(zipcode)

            # If an extended zipcode in a form of XXXXX-XXXX is provided,
            # only use the first part
            zipcode = zipcode.split('-')[0]

            return td.us_zip_to_tz_ids.get(zipcode)
        elif state or city:
            if city and 'city:%s' % city.lower() in data_exceptions:
                return data_exceptions['city:%s' % city.lower()]['include']
            if len(state) != 2:
                state = td.normalized_states['US'].get(state.lower(), state)
            zcdb = ZipCodeDatabase()
            zipcodes = zcdb.find_zip(city=city,
                                     state=state,
                                     exact=True,
                                     limit=1)
            if zipcodes:
                return td.us_zip_to_tz_ids.get(zipcodes[0].zip)
            elif city is not None:
                zipcodes = zcdb.find_zip(state=state, exact=True, limit=1)
                if zipcodes:
                    return td.us_zip_to_tz_ids.get(zipcodes[0].zip)
    elif country == 'CA' and state:
        if len(state) != 2:
            state = td.normalized_states['CA'].get(state.lower(), state)
        return td.ca_state_to_tz_ids.get(state)
    elif country == 'AU' and state:
        if len(state) != 2:
            state = td.normalized_states['AU'].get(state.lower(), state)
        return td.au_state_to_tz_ids.get(state)

    return pytz.country_timezones.get(country)
コード例 #2
0
ファイル: LC_loading.py プロジェクト: jmmcfarl/loan-picker
def load_location_data(data_dir,group_by='zip3'):
    """Helper function to load lat/long coordinates of each 3-digit zip code.
    Returns data grouped by either 3-digit zip, 5-digit zip, or county FIPS"""
    import us
    sys.path.append('/Users/james/data_science/pyzipcode')
    from pyzipcode import ZipCodeDatabase

    # Build pandas zip code database with lat/long coordinates
    zcdb = ZipCodeDatabase() #create zipcode database object
    zip_dict = dict([('zip_code',[]),('latitude',[]),('longitude',[]),('state',[])])
    for state in us.STATES:
        cur_zips = zcdb.find_zip(state='%s' % state.abbr)
        for cur_zip in cur_zips:
            zip_dict['zip_code'].append(cur_zip.zip)
            zip_dict['latitude'].append(cur_zip.latitude)
            zip_dict['longitude'].append(cur_zip.longitude)
            zip_dict['state'].append(cur_zip.state)
            
    zip_data = pd.DataFrame(zip_dict) #make dict into dataframe
    zip_data['zip3'] = zip_data['zip_code'].apply(lambda x: int(x[:3])) #add a column containing 3-digit zips
    zip_data['zip_code'] = zip_data['zip_code'].astype(int)
    zip_to_fips = make_zip_to_fips_dict(data_dir)
    zip_data['fips'] = zip_data['zip_code'].map(zip_to_fips)
    if group_by == 'zip3':   
        zip_data = zip_data.groupby('zip3').agg({'latitude': np.mean,'longitude':np.mean})
    elif group_by == 'zip':
        zip_data.set_index('zip_code', inplace=True)
    elif group_by == 'fips':
        zip_data = zip_data.groupby('fips').agg({'latitude': np.mean,'longitude':np.mean})
        
    return zip_data
コード例 #3
0
ファイル: YelpCity.py プロジェクト: guan-terry/yelp-city
    def __init__(self, key, city='San Francisco'):
        """
        The construct for the YelpCity class.

        Parameters
        ----------
        key : str
            The yelp API key needed to call Yelp Fusion API.
        city : str
            The city to search over.
        """

        api_host = 'https://api.yelp.com'
        search_path = '/v3/businesses/search'
        business_path = '/v3/businesses/'
        zcdb = ZipCodeDatabase()
        url = api_host + search_path
        header = {
            'Authorization': 'Bearer %s' % key,
        }
        param = {
            'location': city,
            'limit': 1,
            'offset': 0,
            'term': 'restaurants',
            'sort_by': 'best_match',
        }
        response = requests.get(url=url, headers=header, params=param)

        self.business_id_set = set()
        self.business_list = []
        for zip_obj in zcdb.find_zip(city=city):
            self.business_list += self.find_businesses(zip_obj, url, header)
        self.region = response.json()['region']
コード例 #4
0
 def _get_zipcode_list(self, location):
     if location.lower() == "all":
         with file("yelp-city-USA-list.txt") as f:
             return f.read().split()
     zip_db = ZipDB()
     if re.findall(r'[0-9,]+', location):
         zips = location.split(',')
     elif location.find(",") < 0 and len(location) > 2:
         print "search city ..."
         zips = [z.zip for z in zip_db.find_zip(city=location) or []]
     elif len(location) == 2:
         print "search state ..."
         zips = [z.zip for z in zip_db.find_zip(state=location) or []]
     else:
         print "search city: %s state: %s" % tuple(location.split(","))
         zips = [
             z.zip
             for z in zip_db.find_zip(city=location.split(',')[0],
                                      state=location.split(',')[1]) or []
         ]
     return list(
         set(["%s,%s" % (zip_db[i].city, zip_db[i].state) for i in zips]))
コード例 #5
0
ファイル: data.py プロジェクト: ninjapanther/tz-trout
    def generate_zip_to_tz_id_map(self):
        """ Generate the map of US zip codes to time zone identifiers. The
        method finds all the possible time zone identifiers for each zip code
        based on a UTC offset stored for that zip in pyzipcode.ZipCodeDatabase.
        """
        zcdb = ZipCodeDatabase()
        zips = list(zcdb.find_zip())
        zips_len = len(zips)
        tz_ids_to_zips = defaultdict(list)
        for cnt, zip in enumerate(zips):
            ids = tuple(self._get_tz_identifiers_for_us_zipcode(zip))

            # apply the data exceptions
            exceptions = data_exceptions.get(
                'zip:' + zip.zip) or data_exceptions.get('state:' +
                                                         zip.state) or {}
            exceptions['include'] = exceptions.get(
                'include', []) + data_exceptions['all'].get(
                    'include', []) if 'all' in data_exceptions else []
            exceptions['exclude'] = exceptions.get(
                'exclude', []) + data_exceptions['all'].get(
                    'exclude', []) if 'all' in data_exceptions else []
            if exceptions:
                ids = tuple((set(ids) - set(exceptions['exclude']))
                            | set(exceptions['include']))

            tz_ids_to_zips[ids].append(zip.zip)

            stdout.write('\r%d/%d' % (cnt + 1, zips_len))
            stdout.flush()

        zips_to_tz_ids = {
            tuple(zips): json.dumps(ids)
            for ids, zips in tz_ids_to_zips.iteritems()
        }

        file = open(US_ZIPS_TO_TZ_IDS_MAP_PATH, 'w')
        file.write(pickle.dumps(zips_to_tz_ids))
        file.close()
コード例 #6
0
def load_location_data(data_dir, group_by='zip3'):
    """Helper function to load lat/long coordinates of each 3-digit zip code.
    Returns data grouped by either 3-digit zip, 5-digit zip, or county FIPS"""
    import us
    sys.path.append('/Users/james/data_science/pyzipcode')
    from pyzipcode import ZipCodeDatabase

    # Build pandas zip code database with lat/long coordinates
    zcdb = ZipCodeDatabase()  #create zipcode database object
    zip_dict = dict([('zip_code', []), ('latitude', []), ('longitude', []),
                     ('state', [])])
    for state in us.STATES:
        cur_zips = zcdb.find_zip(state='%s' % state.abbr)
        for cur_zip in cur_zips:
            zip_dict['zip_code'].append(cur_zip.zip)
            zip_dict['latitude'].append(cur_zip.latitude)
            zip_dict['longitude'].append(cur_zip.longitude)
            zip_dict['state'].append(cur_zip.state)

    zip_data = pd.DataFrame(zip_dict)  #make dict into dataframe
    zip_data['zip3'] = zip_data['zip_code'].apply(
        lambda x: int(x[:3]))  #add a column containing 3-digit zips
    zip_data['zip_code'] = zip_data['zip_code'].astype(int)
    zip_to_fips = make_zip_to_fips_dict(data_dir)
    zip_data['fips'] = zip_data['zip_code'].map(zip_to_fips)
    if group_by == 'zip3':
        zip_data = zip_data.groupby('zip3').agg({
            'latitude': np.mean,
            'longitude': np.mean
        })
    elif group_by == 'zip':
        zip_data.set_index('zip_code', inplace=True)
    elif group_by == 'fips':
        zip_data = zip_data.groupby('fips').agg({
            'latitude': np.mean,
            'longitude': np.mean
        })

    return zip_data
コード例 #7
0
ファイル: data.py プロジェクト: closeio/tz-trout
    def generate_zip_to_tz_id_map(self):
        """ Generate the map of US zip codes to time zone identifiers. The
        method finds all the possible time zone identifiers for each zip code
        based on a UTC offset stored for that zip in pyzipcode.ZipCodeDatabase.
        """
        zcdb = ZipCodeDatabase()
        zips = list(zcdb.find_zip())
        tz_ids_to_zips = defaultdict(list)
        for zip in _progressbar(zips):
            ids = tuple(self._get_tz_identifiers_for_us_zipcode(zip))

            # apply the data exceptions
            exceptions = data_exceptions.get('zip:' + zip.zip) or data_exceptions.get('state:' + zip.state) or {}
            exceptions['include'] = exceptions.get('include', []) + data_exceptions['all'].get('include', []) if 'all' in data_exceptions else []
            exceptions['exclude'] = exceptions.get('exclude', []) + data_exceptions['all'].get('exclude', []) if 'all' in data_exceptions else []
            if exceptions:
                ids = tuple((set(ids) - set(exceptions['exclude'])) | set(exceptions['include']))

            tz_ids_to_zips[ids].append(zip.zip)

        zips_to_tz_ids = {zip: ids for ids, zips in tz_ids_to_zips.items() for zip in zips}

        _dump_json_data(US_ZIPS_TO_TZ_IDS_MAP_PATH, zips_to_tz_ids)
コード例 #8
0
for file in os.listdir(dir):
    if file.endswith(".csv"):
        file_length = file_len(file)
        with open(file, 'r') as inp, open(file.replace('.csv', '_edit.csv'),
                                          'w',
                                          newline='') as out:
            writer = csv.writer(out)
            curr_line = 0
            for row in csv.reader(inp):
                if curr_line < file_length - 31:
                    curr_line += 1
                    writer.writerow(row)

# getting NJ zip codes
zc = ZipCodeDatabase()
zipped_codes_nj = zc.find_zip(state='NJ')
uz_codes_nj = [z.zip for z in zipped_codes_nj]
# number of zip codes
num_zips = len(uz_codes_nj)
print(num_zips)
# want to use random zip codes to determine trends for the whole state - roughly want 70%
num_rand = math.floor(num_zips * .7)
# shuffle the zip codes
random.seed(10)
random.shuffle(uz_codes_nj)
# extract the first 70% zips -- these are now the random zip codes we will collect info on
good_zips = uz_codes_nj[0:num_rand]
# these zip codes gave problems when trying to get data - remove them
good_zips.remove('08017')
good_zips.remove('08370')
good_zips.remove('07427')
コード例 #9
0
zcdb = ZipCodeDatabase()
geolocator = Nominatim()
for area_code in area_code_and_place:
    state = area_code.split("-")[1].split("(")[0].strip()
    if "DC" in state:
        state = us.states.lookup("DC").abbr
    else:
        state = us.states.lookup(state).abbr
    city = area_code.split("-")[1].split("(")[1].rstrip(")")
    city = city.strip()
    if "," in city:
        city = city.split(",")[0]
    if " " in city:
        if [prefix for prefix in prefixes if prefix in city] == []:
            city = city.split(" ")[0]
    if isinstance(zcdb.find_zip(city=city, state=state), list):
        zip_code = zcdb.find_zip(city=city, state=state)[0]
    else:
        zip_code = zcdb.find_zip(city=city, state=state)
        if zip_code is None:
            try:
                zip_code = zcdb.find_zip(state=state)[0]
            except:
                if state == "MP":
                    zip_code = edict({
                        "latitude": 15.200755,
                        "longitude": 145.756952
                    })
                elif state == "GU":
                    zip_code = edict({
                        "latitude": 13.463345,
コード例 #10
0
zcdb = ZipCodeDatabase()
geolocator = Nominatim()
for area_code in area_code_and_place:
    state = area_code.split("-")[1].split("(")[0].strip()
    if "DC" in state:
        state = us.states.lookup("DC").abbr
    else:
        state = us.states.lookup(state).abbr
    city = area_code.split("-")[1].split("(")[1].rstrip(")")
    city = city.strip()
    if "," in city:
        city = city.split(",")[0]
    if " " in city:
        if [prefix for prefix in prefixes if prefix in city] == []:
            city = city.split(" ")[0]
    if isinstance(zcdb.find_zip(city=city,state=state),list):
        zip_code = zcdb.find_zip(city=city,state=state)[0]
    else:
        zip_code = zcdb.find_zip(city=city,state=state)
        if zip_code is None:
            try:
                zip_code = zcdb.find_zip(state=state)[0]
            except:
                if state == "MP":
                    zip_code = edict({
                        "latitude":15.200755,
                        "longitude":145.756952
                    })
                elif state == "GU":
                    zip_code = edict({
                        "latitude":13.463345,
コード例 #11
0
from session import db_session
from models import Zip

from pyzipcode import ZipCodeDatabase

zcdb = ZipCodeDatabase()

for z in zcdb.find_zip():
    zip = Zip(zip_code=z.zip)
    db_session.add(zip)

db_session.commit()
コード例 #12
0
ファイル: geo_tag.py プロジェクト: ettlinger/TwitterSense
flist=str(sys.argv)
tfile= flist[12:len(flist)-2]
#f = open(tfile, 'r')
f = open("sent9.txt", 'r')
g = open("t9.txt", 'w')

zcdb = ZipCodeDatabase()
c=0
for line in f:
    fcoords=()
    tweet = line.split("|")
    coords = re.search(r"\[(.*)\]", tweet[0]).group(1)
    x, y = map(float, re.findall(r'[+-]?[0-9.]+', coords))
    location = rg.search([x,y])
    if location[0]['cc'] == "US":
        state = location[0]['admin1']
        city = location[0]['name']
        zlist=zcdb.find_zip(city=city)
        if zlist>0:
            zipcode = random.choice(zlist)
            s = tweet[-1].strip('\n')+","+zipcode.zip+","+state+"\n"
#    print s.encode("UTF-8")
            g.write(s.encode('utf8'))
    c+=1
    if c>100:
        g.flush()
        c=0
f.close()
g.close()
コード例 #13
0
def get_dispensary_list(location):
    global driverpath, chrome_options, dispensaries_site_url
    url_location = location.lower()
    url_locaton = url_location.replace(' ', '-')
    driver = webdriver.Chrome(driverpath, options=chrome_options)

    city = location.split('/')[1]
    zcdb = ZipCodeDatabase()
    zip_list = zcdb.find_zip(city=city)
    if len(zip_list):
        zip_code = zip_list[0].zip
    else:
        print("  Can't find zip code of given city!")
        zip_code = ""

    print(
        f"---------------- {location} -  start --------------------------------"
    )
    dispensary_list = []

    page_index = 1
    while (1):
        page_url = dispensaries_site_url + url_location + "?page=" + str(
            page_index)
        driver.get(page_url)
        time.sleep(1)
        containers = driver.find_elements_by_class_name(
            'map-listings-list__ListWrapper-sc-1ynfzzj-0')
        if containers:
            drawer = containers[0]

            data_lists = drawer.find_elements_by_class_name(
                'styled-components__Main-sc-1e5myvf-6')

            if data_lists:

                for every_data in data_lists:
                    title = rating = helper = ""
                    title_element = every_data.find_element_by_class_name(
                        "base-card__Title-sc-1fhygl1-4")
                    try:
                        rating_element = every_data.find_element_by_class_name(
                            "rating__RatingValue-sc-12pds58-1")
                        couting_element = every_data.find_element_by_class_name(
                            "rating__Count-sc-12pds58-2")
                    except:
                        rating_element = []
                    helper_element = every_data.find_element_by_class_name(
                        "base-card__Helper-sc-1fhygl1-5")
                    if (title_element):
                        title = title_element.text
                    if (rating_element):
                        rating = rating_element.text + couting_element.text
                    if (helper_element):
                        helper = helper_element.text

                    if (title != ""):
                        data_json = {
                            "title": title,
                            "rating": rating,
                            "helper": helper,
                            "location": location,
                            "zip code": zip_code
                        }
                        dispensary_list.append(data_json)
                        print("     ", data_json)

            page_index += 1
        else:
            print("  Sorry, can't find list in this URL ", page_url)

            break

    newlist = sorted(dispensary_list, key=itemgetter('title'))
    print(
        f"---------------- {location} -  End--------------------------------")
    return newlist
コード例 #14
0
def search(input):
    #initializing for the for loop @ line 25
    try:
        events = []
        try:
            #testing if it's an int and if it's length is 5 (zip code reqs)
            z = int(input)
            if (len(input) == 5):
                #use jambase.py to get the events in this zipcode in a list
                #event list should look like[ [eventname1, eventid1, eventlocation1], ...]
                events = jambase.eventsHelp(input, None, 0)
                return [events, None, input]
        except ValueError:
            zcdb = ZipCodeDatabase()
            #parse yahoo geo.places api GET api here, to see if it's actually a location
            parsedInput = input.replace(" ", "+")
            rawData = urllib2.urlopen(
                "http://query.yahooapis.com/v1/public/yql?q=select%20%2a%20from%20geo.places%20where%20text='"
                + parsedInput + "'&format=json").read()
            data = json.loads(rawData)
            #check if any places with this name exist
            if (data['query']['results'] == None):
                #check if artist exists
                if (jambase.artistExists(input) == False):
                    return [
                        "Error: Not found. Please search an artist OR a location, not both.",
                        True
                    ]
                else:
                    artists = jambase.eventsHelp(None, input, 0)
                    return [None, artists, input]
            elif (isinstance(data['query']['results']['place'], list)):
                if (data['query']['results']['place'][0]['country']['content']
                        != "United States"):
                    if (jambase.artistExists(input) == False):
                        return [
                            "All locations must be within the United States.",
                            False
                        ]
                    else:
                        artists = jambase.eventsHelp(None, input, 0)
                        return [None, artists, input]
            elif (not isinstance(data['query']['results']['place'], list)):
                if (data['query']['results']['place']['country']['content'] !=
                        "United States"):
                    if (jambase.artistExists(input) == False):
                        return [
                            "All locations must be within the United States.",
                            False
                        ]
                    else:
                        artists = jambase.eventsHelp(None, input, 0)
                        return [None, artists, input]
            else:
                #yahoo location will get the city where their place is
                placeCity = data['query']['results']['place'][0]['locality1'][
                    'content']
                z = zcdb.find_zip(city=placeCity)
                #50 radius
                events = jambase.eventsHelp(z[len(z) / 2].zip, None, 50)
                #returning the final rendertemplate, either eventList or artistList can be null.
                return [events, None, input]
    except urllib2.HTTPError:
        return ["Too many API requests, please try again in a day.", True]
コード例 #15
0
from pyzipcode import ZipCodeDatabase
from scipy.spatial import distance
import pandas as pd

zcdb = ZipCodeDatabase()
sf_zipcodes = zcdb.find_zip(city="San Francisco", state="CA")
# print(len(sf_zipcodes))

def long_lat_to_zipcode(input_longitude,input_latitude):
    """
    function to convert latitude and longitude to zipcode:
    find euclidean distance between user provided latitude and longitude and
    all latitudes, longitudes from sf_zipcodes and select the row from latter
    which has minimum distance. then extract its zip code

    :param input_longitude,input_latitude: latitude and longitude that you want to convert to closest zipcode
    :return closest_zip: zip code
    """

    closest_zip = None
    euc_dist = None

    for i, sf_zipcode in enumerate(sf_zipcodes):
        # extract latitude and longitude from each row in sf_zipcode
        lat = sf_zipcode.latitude
        long = sf_zipcode.longitude
        # calculate euclidean distance between lat and long from sf_zipcode and input latitude and longitude value
        euclidean_dist = round( distance.euclidean((long,lat), (input_longitude,input_latitude)), 4)
        # assign the euclidean distance calculated for first row as euc_dist
        if i == 0:
            euc_dist = euclidean_dist
コード例 #16
0
ファイル: url_scraper.py プロジェクト: geogeek1911/rf_scraper
import csv
import redfin_scraper as rs
from pyzipcode import ZipCodeDatabase
from datetime import datetime as dt
from selenium.webdriver.chrome.options import Options
import os

noBrowserUI = True
os.environ["DBUS_SESSION_BUS_ADDRESS"] = '/dev/null'
zcdb = ZipCodeDatabase()
zips = [zc.zip for zc in zcdb.find_zip()]
zips = ['01510']
sttm = dt.now().strftime('%Y%m%d-%H%M%S')
dataDir = './data'
chrome_options = Options()
chrome_options.add_extension("./proxy.zip")
# chrome_options.add_argument("--load-extension=./proxy.zip")
chrome_options.add_argument("--ignore-certificate-errors")
# chrome_options.add_argument("--window-size=1024,768")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-infobars")
sttm = dt.now().strftime('%Y%m%d-%H%M%S')

with open('not_listed.csv', 'rb') as f:
    reader = csv.reader(f)
    not_listed = [zc for zclist in reader for zc in zclist]

with open('./processed_zips.csv', 'rb') as f:
    reader = csv.reader(f)
    processed = [row[0] for row in reader]