Exemple #1
0
def generate_data():
    some_zipcodes = [elem["zip_code"] for elem in zipcodes.list_all()][:10]
    df = pd.DataFrame()
    for _ in range(1000):
        tmp = {}
        tmp["checking_balance"] = random.randint(1, 4)
        tmp["months_loan_duration"] = random.randint(1, 75)
        tmp["credit_history"] = random.randint(1, 5)
        tmp["purpose"] = random.randint(1, 5)
        tmp["amount"] = random.randint(1, 10000)
        tmp["savings_balance"] = random.randint(1, 5)
        tmp["employment_duration"] = random.randint(1, 5)
        tmp["percent_of_income"] = random.randint(1, 25)
        tmp["years_at_residence"] = random.randint(1, 25)
        tmp["age"] = random.randint(18, 85)
        tmp["other_credit"] = random.randint(1, 3)
        tmp["housing"] = random.randint(1, 3)
        tmp["job"] = random.randint(1, 4)
        tmp["existing_loans_count"] = random.randint(0, 3)
        tmp["dependents"] = random.randint(1, 3)
        tmp["phone"] = random.randint(1, 2)
        tmp["default"] = random.randint(1, 2)
        tmp["gender"] = random.randint(1, 2)
        tmp["status"] = random.randint(1, 3)
        tmp["zipcode"] = random.choice(some_zipcodes)
        tmp["race"] = random.randint(1, 4)
        df = df.append(tmp, ignore_index=True)
    return df
def find_near_zips(zipc, city, state):
    x = zipcodes.similar_to(zipc[0], 
                    zips=zipcodes.filter_by(zipcodes.list_all(), active=True, city= city, state = state))
    zipps = []
    for zips in x:
        zipps.append(zips['zip_code'])
    return zipps
 def start_requests(self):
     for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
         for cat in self.categories:
             url = self.search_url.format(
                 zipcode['city'].lower().replace(' ', '-'),
                 zipcode['state'].lower(), cat)
             yield scrapy.Request(url, callback=self.parse)
Exemple #4
0
 def start_requests(self):
     target_states = [state['abbr'] for state in states]
     for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
         if zipcode['state'] in target_states:
             url = self.search_url.format(zipcode['zip_code'])
             yield scrapy.Request(url,
                                  callback=self.parse,
                                  meta={'state': zipcode['state']})
Exemple #5
0
def chicago_zip_codes():
    results = []
    for index, zip_code in enumerate(
            zipcodes.filter_by(zipcodes.list_all(),
                               active=True,
                               city='CHICAGO')):
        url = f"{settings.V1_URL}/url-mediator/session-builder?zip_code={zip_code['zip_code']}"
        results.append(
            ZipCode(pk=index, zip_code=zip_code['zip_code'], url=url))
    return results
    def start_requests(self):
        """
        yield scrapy.Request('https://www.realtor.com/realestateteam/89138', callback=self.parse, meta={'search_keyword': '89138'})
        target_states = [state['abbr'] for state in states]
        for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
            if zipcode['state'] in target_states:
                url = self.search_url.format(zipcode['city'], zipcode['state'], zipcode['zip_code'])
                yield scrapy.Request(url, callback=self.parse)

        for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
            url = self.search_url.format(zipcode['zip_code'])
            yield scrapy.Request(url, callback=self.parse, meta={'search_keyword': zipcode['zip_code']})
        """
        for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
            url = self.search_url.format(zipcode['zip_code'])
            yield scrapy.Request(url, callback=self.parse, meta={'search_keyword': zipcode['zip_code']})
Exemple #7
0
    def start_requests(self):
        with open(
                os.path.dirname(os.path.realpath(__file__)) +
                "/../external_data/output/findamortgagebrokercom.csv",
                'w') as csvfile:
            fieldnames = [
                'Organization', 'Full Name', 'Zipcode', 'Address', 'Email',
                'NMLS', 'Website', 'Phone'
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

        for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
            formData = {"Criteria": zipcode['zip_code']}
            print(zipcode['zip_code'])
            yield scrapy.FormRequest(self.search_url,
                                     callback=self.parse,
                                     formdata=formData)
 def start_requests(self):
     target_states = [state['abbr'] for state in states]
     for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
         if zipcode['state'] in target_states:
             url = self.most_active_search_url.format(
                 '-'.join(zipcode['city'].split()).lower(),
                 zipcode['state'].lower(), zipcode['zip_code'])
             yield scrapy.Request(url,
                                  callback=self.parse,
                                  meta={'dont_cache': True})
             url = self.most_sales_search_url.format(
                 '-'.join(zipcode['city'].split()).lower(),
                 zipcode['state'].lower(), zipcode['zip_code'])
             yield scrapy.Request(url,
                                  callback=self.parse,
                                  meta={'dont_cache': True})
             url = self.most_listings_search_url.format(
                 '-'.join(zipcode['city'].split()).lower(),
                 zipcode['state'].lower(), zipcode['zip_code'])
             yield scrapy.Request(url,
                                  callback=self.parse,
                                  meta={'dont_cache': True})
 def start_requests(self):
     for zipcode in zipcodes.list_all():
         url = self.search_url.format(zipcode['state'], zipcode['city'])
         yield scrapy.Request(url, callback=self.parse)
################################## Main code ###################################

if __name__ == '__main__':

    part = sys.argv[1]

    if part == 'search':
        print('Working on getting property ids by zip code.')
        time.sleep(3)

        # Construct zip codes
        print('Getting zip codes.')
        all_zip_codes = sorted(
            list({z['zip_code']
                  for z in zipcodes.list_all()}))
        print(f'{len(all_zip_codes)} zip codes in total.')

        time.sleep(5)

        print('Running async requests...')
        start_time = time.time()
        loop = asyncio.get_event_loop()
        search_results = loop.run_until_complete(run(get_search,
                                                     all_zip_codes))
        print('--- %s seconds ---' % (time.time() - start_time))

        # Construct the results summary table
        print('Exporting summary table.')
        search_results = pd.DataFrame(search_results)
        search_results.to_csv(os.path.join(
Exemple #11
0
    sem = asyncio.Semaphore(50)
    async with aiohttp.ClientSession() as session:
        async with sem:
            for zip_code in tasks:
                results.append(
                    asyncio.ensure_future(get_results(session, sem, zip_code)))
            return await asyncio.gather(*results)


################################################################################

################################## Main code ###################################

if __name__ == '__main__':

    # Get zip codes
    print('Constructing zip code list...')
    all_zip_codes = sorted(list({z['zip_code'] for z in zipcodes.list_all()}))
    print(f'List of {len(all_zip_codes)} constructed.')
    print(f'Writing to {output_folder_path}.')

    time.sleep(5.5)

    print('Running async requests...')
    start_time = time.time()
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(run(all_zip_codes))
    print('--- %s seconds ---' % (time.time() - start_time))

################################################################################
Exemple #12
0
 def test_list_all(self):
     self.assertEqual(zipcodes.list_all(), zipcodes._zips)
Exemple #13
0
 def start_requests(self):
     for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
         url = self.search_url.format(zipcode['zip_code'])
         yield scrapy.Request(url, callback=self.parse, meta={'zip': zipcode['zip_code']})
Exemple #14
0
from sqlite3 import connect
from typing import Tuple

import numpy as np
import zipcodes as zp
from geopy import Nominatim
from numba import jit
from requests import Session

from cars.util import CAR_DB

R_MEAN_EARTH_MI = 3_958.7613

LATLONG_BY_ZIP: dict[str, Tuple[float, float]] = {
    z["zip_code"]: (float(z["lat"]), float(z["long"]))
    for z in zp.list_all()
    if z["zip_code_type"] == "STANDARD"
}


@jit(nopython=True)  # type: ignore
def great_circle_miles(p0: np.ndarray, lon1: float, lat1: float) -> np.ndarray:
    """
    Vectorized great-circle distance calculation.

    Args:
        p0: array, shape [n, 2]: lon/lat of first point
        lon1: lon of second point, scalar
        lat1: lat of second point, scalar

    Returns:
Exemple #15
0
def main():
    # name of this stage, typically a name to reference the assertion
    # assertion: lambda which returns unittest callable with self's (testcase's) context
    # predicates: lambda or sequence of lambdas to call and pass to the assertion
    unittests_schema = [
        {
            "name": "true",
            "assertion": lambda self: self.assertTrue,
            "predicates": [
                lambda: zipcodes.is_real("06905"),
                lambda: zipcodes._contains_nondigits("1234a"),
                # bad length
                lambda: callable_raise_exc(
                    lambda: zipcodes._clean("000000"), ValueError
                ),
                # bad characters
                lambda: callable_raise_exc(
                    lambda: zipcodes._clean("0000a"), ValueError
                ),
                # ensure zips argument works
                lambda: len(
                    zipcodes.similar_to(
                        "2", zips=zipcodes.filter_by(active=True, city="Windsor")
                    )
                )
                == 3,
            ],
        },
        {
            "name": "false",
            "assertion": lambda self: self.assertFalse,
            "predicates": [
                lambda: zipcodes.is_real("91239"),
                # digits and "-" are acceptable
                lambda: zipcodes._contains_nondigits("12345"),
                lambda: zipcodes._contains_nondigits("1234-"),
            ],
        },
        {
            "name": "equal",
            "assertion": lambda self: self.assertEqual,
            "predicates": [
                # valid_zipcode_length parameter
                (lambda: zipcodes._clean("0646", 4), lambda: "0646"),
                # default behavior
                (lambda: zipcodes._clean("06469"), lambda: "06469"),
                (lambda: zipcodes.list_all(), lambda: zipcodes._zips),
                (
                    lambda: zipcodes.filter_by(city="Old Saybrook"),
                    lambda: [
                        {
                            "zip_code": "06475",
                            "zip_code_type": "STANDARD",
                            "active": True,
                            "city": "Old Saybrook",
                            "acceptable_cities": [],
                            "unacceptable_cities": ["Fenwick"],
                            "state": "CT",
                            "county": "Middlesex County",
                            "timezone": "America/New_York",
                            "area_codes": ["860"],
                            "world_region": "NA",
                            "country": "US",
                            "lat": "41.3015",
                            "long": "-72.3879",
                        }
                    ],
                ),
                (
                    lambda: zipcodes.similar_to("1018"),
                    lambda: [
                        {
                            "acceptable_cities": [],
                            "active": False,
                            "area_codes": ["212"],
                            "city": "New York",
                            "country": "US",
                            "county": "New York County",
                            "lat": "40.71",
                            "long": "-74",
                            "state": "NY",
                            "timezone": "America/New_York",
                            "unacceptable_cities": ["J C Penney"],
                            "world_region": "NA",
                            "zip_code": "10184",
                            "zip_code_type": "UNIQUE",
                        },
                        {
                            "acceptable_cities": [],
                            "active": True,
                            "area_codes": ["212"],
                            "city": "New York",
                            "country": "US",
                            "county": "New York County",
                            "lat": "40.7143",
                            "long": "-74.0067",
                            "state": "NY",
                            "timezone": "America/New_York",
                            "unacceptable_cities": [],
                            "world_region": "NA",
                            "zip_code": "10185",
                            "zip_code_type": "PO BOX",
                        },
                    ],
                ),
                (
                    lambda: zipcodes.similar_to("1005"),
                    lambda: [
                        {
                            "zip_code": "10055",
                            "zip_code_type": "STANDARD",
                            "active": True,
                            "city": "New York",
                            "acceptable_cities": [],
                            "unacceptable_cities": ["Manhattan"],
                            "state": "NY",
                            "county": "New York County",
                            "timezone": "America/New_York",
                            "area_codes": ["212"],
                            "world_region": "NA",
                            "country": "US",
                            "lat": "40.7579",
                            "long": "-73.9743",
                        }
                    ],
                ),
                (
                    lambda: zipcodes.similar_to("10001"),
                    lambda: [
                        {
                            "zip_code": "10001",
                            "zip_code_type": "STANDARD",
                            "active": True,
                            "city": "New York",
                            "acceptable_cities": [],
                            "unacceptable_cities": [
                                "Empire State",
                                "G P O",
                                "Greeley Square",
                                "Macys Finance",
                                "Manhattan",
                            ],
                            "state": "NY",
                            "county": "New York County",
                            "timezone": "America/New_York",
                            "area_codes": ["718", "917", "347", "646"],
                            "world_region": "NA",
                            "country": "US",
                            "lat": "40.7508",
                            "long": "-73.9961",
                        }
                    ],
                ),
            ],
        },
    ]

    generate_unittests(unittests_schema)
    logger.info("Zipcodes version: {}".format(zipcodes.__version__))
    unittest.main()
Exemple #16
0
import sys
import requests
from lxml import html
import zipcodes
import json

import sys

reload(sys)
sys.setdefaultencoding('utf8')

realtor_home_url = "https://www.realtor.com"
search_url = "https://www.realtor.com/realestateandhomes-search/{0}/type-single-family-home/price-150000-550000/nc-hide"

new_zipcodes_list = []
for zipcode in zipcodes.filter_by(zipcodes.list_all(), active=True):
    if zipcode['state'] == "FL" and \
            not os.path.isfile(os.path.dirname(os.path.realpath(__file__)) + "/../external_data/output/listing_searches_by_zip_codes/florida_{0}_listings_list.csv".format(zipcode['zip_code'])):
        new_zipcodes_list.append(zipcode['zip_code'])

for zipcode in new_zipcodes_list:
    retry_limit = 3

    while retry_limit > 0:
        try:
            from six.moves.urllib import request

            opener = request.build_opener(
                request.ProxyHandler({'https': 'http://127.0.0.1:24000'}))
            #            html_content = opener.open(
            #                'https://www.realtor.com/realestateandhomes-search/32615/type-single-family-home/price-150000-550000/nc-hide').read()
################################################################################

################################## Main code ###################################

if __name__ == '__main__':

    # Get zip codes
    print('Executing SQL query...')
    engine = db.create_engine(
        'postgresql://{user}:{user_pass}@{host}/{dataname2}')
    connection = engine.connect()
    zip_codes_table = pd.read_sql(get_zips_statement, engine)
    engine.dispose()

    print('Constructing zip code list...')
    rest_zips = set(zip_codes_table['zip_code'].tolist())
    all_zip_codes = {z['zip_code'] for z in zipcodes.list_all()}
    zips_to_request = all_zip_codes - rest_zips
    zips_to_request = list(zips_to_request)
    print(f'List of {len(zips_to_request)} constructed.')

    time.sleep(5.5)

    print('Running async requests...')
    start_time = time.time()
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(run(zips_to_request))
    print('--- %s seconds ---' % (time.time() - start_time))

################################################################################