def generate_data(): some_zipcodes = [elem["zip_code"] for elem in zipcodes.list_all()][:10] df = pd.DataFrame() for _ in range(1000): tmp = {} tmp["checking_balance"] = random.randint(1, 4) tmp["months_loan_duration"] = random.randint(1, 75) tmp["credit_history"] = random.randint(1, 5) tmp["purpose"] = random.randint(1, 5) tmp["amount"] = random.randint(1, 10000) tmp["savings_balance"] = random.randint(1, 5) tmp["employment_duration"] = random.randint(1, 5) tmp["percent_of_income"] = random.randint(1, 25) tmp["years_at_residence"] = random.randint(1, 25) tmp["age"] = random.randint(18, 85) tmp["other_credit"] = random.randint(1, 3) tmp["housing"] = random.randint(1, 3) tmp["job"] = random.randint(1, 4) tmp["existing_loans_count"] = random.randint(0, 3) tmp["dependents"] = random.randint(1, 3) tmp["phone"] = random.randint(1, 2) tmp["default"] = random.randint(1, 2) tmp["gender"] = random.randint(1, 2) tmp["status"] = random.randint(1, 3) tmp["zipcode"] = random.choice(some_zipcodes) tmp["race"] = random.randint(1, 4) df = df.append(tmp, ignore_index=True) return df
def find_near_zips(zipc, city, state): x = zipcodes.similar_to(zipc[0], zips=zipcodes.filter_by(zipcodes.list_all(), active=True, city= city, state = state)) zipps = [] for zips in x: zipps.append(zips['zip_code']) return zipps
def start_requests(self): for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): for cat in self.categories: url = self.search_url.format( zipcode['city'].lower().replace(' ', '-'), zipcode['state'].lower(), cat) yield scrapy.Request(url, callback=self.parse)
def start_requests(self): target_states = [state['abbr'] for state in states] for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): if zipcode['state'] in target_states: url = self.search_url.format(zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'state': zipcode['state']})
def chicago_zip_codes(): results = [] for index, zip_code in enumerate( zipcodes.filter_by(zipcodes.list_all(), active=True, city='CHICAGO')): url = f"{settings.V1_URL}/url-mediator/session-builder?zip_code={zip_code['zip_code']}" results.append( ZipCode(pk=index, zip_code=zip_code['zip_code'], url=url)) return results
def start_requests(self): """ yield scrapy.Request('https://www.realtor.com/realestateteam/89138', callback=self.parse, meta={'search_keyword': '89138'}) target_states = [state['abbr'] for state in states] for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): if zipcode['state'] in target_states: url = self.search_url.format(zipcode['city'], zipcode['state'], zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse) for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): url = self.search_url.format(zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'search_keyword': zipcode['zip_code']}) """ for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): url = self.search_url.format(zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'search_keyword': zipcode['zip_code']})
def start_requests(self): with open( os.path.dirname(os.path.realpath(__file__)) + "/../external_data/output/findamortgagebrokercom.csv", 'w') as csvfile: fieldnames = [ 'Organization', 'Full Name', 'Zipcode', 'Address', 'Email', 'NMLS', 'Website', 'Phone' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): formData = {"Criteria": zipcode['zip_code']} print(zipcode['zip_code']) yield scrapy.FormRequest(self.search_url, callback=self.parse, formdata=formData)
def start_requests(self): target_states = [state['abbr'] for state in states] for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): if zipcode['state'] in target_states: url = self.most_active_search_url.format( '-'.join(zipcode['city'].split()).lower(), zipcode['state'].lower(), zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'dont_cache': True}) url = self.most_sales_search_url.format( '-'.join(zipcode['city'].split()).lower(), zipcode['state'].lower(), zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'dont_cache': True}) url = self.most_listings_search_url.format( '-'.join(zipcode['city'].split()).lower(), zipcode['state'].lower(), zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'dont_cache': True})
def start_requests(self): for zipcode in zipcodes.list_all(): url = self.search_url.format(zipcode['state'], zipcode['city']) yield scrapy.Request(url, callback=self.parse)
################################## Main code ################################### if __name__ == '__main__': part = sys.argv[1] if part == 'search': print('Working on getting property ids by zip code.') time.sleep(3) # Construct zip codes print('Getting zip codes.') all_zip_codes = sorted( list({z['zip_code'] for z in zipcodes.list_all()})) print(f'{len(all_zip_codes)} zip codes in total.') time.sleep(5) print('Running async requests...') start_time = time.time() loop = asyncio.get_event_loop() search_results = loop.run_until_complete(run(get_search, all_zip_codes)) print('--- %s seconds ---' % (time.time() - start_time)) # Construct the results summary table print('Exporting summary table.') search_results = pd.DataFrame(search_results) search_results.to_csv(os.path.join(
sem = asyncio.Semaphore(50) async with aiohttp.ClientSession() as session: async with sem: for zip_code in tasks: results.append( asyncio.ensure_future(get_results(session, sem, zip_code))) return await asyncio.gather(*results) ################################################################################ ################################## Main code ################################### if __name__ == '__main__': # Get zip codes print('Constructing zip code list...') all_zip_codes = sorted(list({z['zip_code'] for z in zipcodes.list_all()})) print(f'List of {len(all_zip_codes)} constructed.') print(f'Writing to {output_folder_path}.') time.sleep(5.5) print('Running async requests...') start_time = time.time() loop = asyncio.get_event_loop() results = loop.run_until_complete(run(all_zip_codes)) print('--- %s seconds ---' % (time.time() - start_time)) ################################################################################
def test_list_all(self): self.assertEqual(zipcodes.list_all(), zipcodes._zips)
def start_requests(self): for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): url = self.search_url.format(zipcode['zip_code']) yield scrapy.Request(url, callback=self.parse, meta={'zip': zipcode['zip_code']})
from sqlite3 import connect from typing import Tuple import numpy as np import zipcodes as zp from geopy import Nominatim from numba import jit from requests import Session from cars.util import CAR_DB R_MEAN_EARTH_MI = 3_958.7613 LATLONG_BY_ZIP: dict[str, Tuple[float, float]] = { z["zip_code"]: (float(z["lat"]), float(z["long"])) for z in zp.list_all() if z["zip_code_type"] == "STANDARD" } @jit(nopython=True) # type: ignore def great_circle_miles(p0: np.ndarray, lon1: float, lat1: float) -> np.ndarray: """ Vectorized great-circle distance calculation. Args: p0: array, shape [n, 2]: lon/lat of first point lon1: lon of second point, scalar lat1: lat of second point, scalar Returns:
def main(): # name of this stage, typically a name to reference the assertion # assertion: lambda which returns unittest callable with self's (testcase's) context # predicates: lambda or sequence of lambdas to call and pass to the assertion unittests_schema = [ { "name": "true", "assertion": lambda self: self.assertTrue, "predicates": [ lambda: zipcodes.is_real("06905"), lambda: zipcodes._contains_nondigits("1234a"), # bad length lambda: callable_raise_exc( lambda: zipcodes._clean("000000"), ValueError ), # bad characters lambda: callable_raise_exc( lambda: zipcodes._clean("0000a"), ValueError ), # ensure zips argument works lambda: len( zipcodes.similar_to( "2", zips=zipcodes.filter_by(active=True, city="Windsor") ) ) == 3, ], }, { "name": "false", "assertion": lambda self: self.assertFalse, "predicates": [ lambda: zipcodes.is_real("91239"), # digits and "-" are acceptable lambda: zipcodes._contains_nondigits("12345"), lambda: zipcodes._contains_nondigits("1234-"), ], }, { "name": "equal", "assertion": lambda self: self.assertEqual, "predicates": [ # valid_zipcode_length parameter (lambda: zipcodes._clean("0646", 4), lambda: "0646"), # default behavior (lambda: zipcodes._clean("06469"), lambda: "06469"), (lambda: zipcodes.list_all(), lambda: zipcodes._zips), ( lambda: zipcodes.filter_by(city="Old Saybrook"), lambda: [ { "zip_code": "06475", "zip_code_type": "STANDARD", "active": True, "city": "Old Saybrook", "acceptable_cities": [], "unacceptable_cities": ["Fenwick"], "state": "CT", "county": "Middlesex County", "timezone": "America/New_York", "area_codes": ["860"], "world_region": "NA", "country": "US", "lat": "41.3015", "long": "-72.3879", } ], ), ( lambda: zipcodes.similar_to("1018"), lambda: [ { "acceptable_cities": [], "active": False, "area_codes": ["212"], "city": "New York", "country": "US", "county": "New York County", "lat": "40.71", "long": "-74", "state": "NY", "timezone": "America/New_York", "unacceptable_cities": ["J C Penney"], "world_region": "NA", "zip_code": "10184", "zip_code_type": "UNIQUE", }, { "acceptable_cities": [], "active": True, "area_codes": ["212"], "city": "New York", "country": "US", "county": "New York County", "lat": "40.7143", "long": "-74.0067", "state": "NY", "timezone": "America/New_York", "unacceptable_cities": [], "world_region": "NA", "zip_code": "10185", "zip_code_type": "PO BOX", }, ], ), ( lambda: zipcodes.similar_to("1005"), lambda: [ { "zip_code": "10055", "zip_code_type": "STANDARD", "active": True, "city": "New York", "acceptable_cities": [], "unacceptable_cities": ["Manhattan"], "state": "NY", "county": "New York County", "timezone": "America/New_York", "area_codes": ["212"], "world_region": "NA", "country": "US", "lat": "40.7579", "long": "-73.9743", } ], ), ( lambda: zipcodes.similar_to("10001"), lambda: [ { "zip_code": "10001", "zip_code_type": "STANDARD", "active": True, "city": "New York", "acceptable_cities": [], "unacceptable_cities": [ "Empire State", "G P O", "Greeley Square", "Macys Finance", "Manhattan", ], "state": "NY", "county": "New York County", "timezone": "America/New_York", "area_codes": ["718", "917", "347", "646"], "world_region": "NA", "country": "US", "lat": "40.7508", "long": "-73.9961", } ], ), ], }, ] generate_unittests(unittests_schema) logger.info("Zipcodes version: {}".format(zipcodes.__version__)) unittest.main()
import sys import requests from lxml import html import zipcodes import json import sys reload(sys) sys.setdefaultencoding('utf8') realtor_home_url = "https://www.realtor.com" search_url = "https://www.realtor.com/realestateandhomes-search/{0}/type-single-family-home/price-150000-550000/nc-hide" new_zipcodes_list = [] for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True): if zipcode['state'] == "FL" and \ not os.path.isfile(os.path.dirname(os.path.realpath(__file__)) + "/../external_data/output/listing_searches_by_zip_codes/florida_{0}_listings_list.csv".format(zipcode['zip_code'])): new_zipcodes_list.append(zipcode['zip_code']) for zipcode in new_zipcodes_list: retry_limit = 3 while retry_limit > 0: try: from six.moves.urllib import request opener = request.build_opener( request.ProxyHandler({'https': 'http://127.0.0.1:24000'})) # html_content = opener.open( # 'https://www.realtor.com/realestateandhomes-search/32615/type-single-family-home/price-150000-550000/nc-hide').read()
################################################################################ ################################## Main code ################################### if __name__ == '__main__': # Get zip codes print('Executing SQL query...') engine = db.create_engine( 'postgresql://{user}:{user_pass}@{host}/{dataname2}') connection = engine.connect() zip_codes_table = pd.read_sql(get_zips_statement, engine) engine.dispose() print('Constructing zip code list...') rest_zips = set(zip_codes_table['zip_code'].tolist()) all_zip_codes = {z['zip_code'] for z in zipcodes.list_all()} zips_to_request = all_zip_codes - rest_zips zips_to_request = list(zips_to_request) print(f'List of {len(zips_to_request)} constructed.') time.sleep(5.5) print('Running async requests...') start_time = time.time() loop = asyncio.get_event_loop() results = loop.run_until_complete(run(zips_to_request)) print('--- %s seconds ---' % (time.time() - start_time)) ################################################################################