# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
#st = zl.zipcodes_list(st_items = ["100", "770"])
st = ["48104"]

# Initialize the webdriver.
driver = zl.init_driver(chromedriver_binary.chromedriver_filename)

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
for idx, term in enumerate(st):
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.

#st = zl.zipcodes_list(st_items = ["48103"])
st = ["Tuscon, AZ"]

# Initialize the webdriver.
driver = zl.init_driver("/Applications/chromedriver")
#os.environ["webdriver.chrome.driver"] = driver

# Go to www.zillow.com/homes/rs
#zl.navigate_to_website(driver, "http://www.zillow.com/homes/recently_sold/house_type/")
zl.navigate_to_website(driver, 'https://www.zillow.com/homes/tuscon-az_rb/')

# Click the "buy" button.
#zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []
Beispiel #3
0
# "91319", "91320", "930", "9062", "9063", "90680"

import time
import pandas as pd
import zillow_functions as zl
from bs4 import BeautifulSoup

# Enter zipcode term here
st = zl.zipcodes_list(st_items=["91320"])
nm = str(st[0])

# Initialize the webdriver.
# Use the location of the chromedriver file in your machine
# in a PC would be somethign like
# driver = zl.init_driver("C:/Users/username/chromedriver.exe")
driver = zl.init_driver("/Users/bjaimes/Desktop/nu/chromedriver")

# Go to https://www.zillow.com/homes/recently_sold
zl.navigate_to_website(driver, "https://www.zillow.com/homes/recently_sold")

# Create 10 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'city': [],
    'state': [],
    'zip': [],
    'sqft': [],
    'price_sqft': [],
    'calculated_price': [],
    'bathrooms': [],
Beispiel #4
0
ap.add_argument("-r",
                "--resume",
                required=False,
                default=0,
                type=int,
                help="start line of zipcode table")
args = ap.parse_args()

zipcodes = pd.read_csv(args.zipcode, dtype={
    "zipcode": object
}).iloc[args.resume:]
st = zipcodes.zipcode.values.tolist()
num_search_terms = len(st)

# Initialize the webdriver.
driver = zl.init_driver("/Users/mlangberg/venv3/bin/chromedriver")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

columns = [
    "address", "city", "state", "zip", "price", "sqft", "bedrooms",
    "bathrooms", "days_on_zillow", "sale_type", "realtor_phone", "url"
]
st_items = []
zfile = open('z_ct.dat', 'r')
for z in zfile:
    z = str(z.strip())
    st_items.append(z)
st = zl.zipcodes_list(st_items[246:])
nm = str(st[0])

#print (nm)

# Initialize the webdriver.
# Use the location of the chromedriver file in your machine
# in a PC would be somethign like
#driver = zl.init_driver("C:/Users/username/chromedriver.exe")

driver = zl.init_driver("/Users/X/chromedriver")

# Go to https://www.zillow.com/homes/recently_sold
zl.navigate_to_website(driver, "https://www.zillow.com/homes/recently_sold")

# Create 10 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'city': [],
    'state': [],
    'zip': [],
    'sqft': [],
    'price_sqft': [],
    'calculated_price': [],
    'bathrooms': [],
Beispiel #6
0
    "92127",
    "92128",
    "92129",
    "92130",
    "92131",  #"92132","92134",
    "92135",
    "92139",
    "92140",
    "92145",
    "92147",
    "92154",
    "92173"
]  # zl.zipcodes_list(st_items = ["100", "770"])

# Initialize the webdriver.
driver = zl.init_driver("chromedriver")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'bathrooms': [],
    'bedrooms': [],
    'city': [],
    'days_on_zillow': [],
# will yield every US zip code that begins with '10', begins with "11", or
# begins with "606" as a single list.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.

#st = zl.zipcodes_list(st_items = ['Novi','48377','MI'])
st = ['Novi', '48377', 'MI']

# Initialize the webdriver.
driver = zl.init_driver()

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'city': [],
    'state': [],
    'zip': [],
    'price': [],
Beispiel #8
0
# Use function zipcodes_list() to create a list of US zip codes that will be
# passed to the scraper. For example, st = zipcodes_list(['10', '11', '606'])
# will yield every US zip code that begins with '10', begins with "11", or
# begins with "606" as a single list.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
st = zl.zipcodes_list(st_items=['10', '11', '606'])

# Initialize the webdriver.
driver = zl.init_driver('C:/Users/username/My Documents/chromedriver')

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'city': [],
    'state': [],
    'zip': [],
    'price': [],
import time
import zillow_functions as zl
from bs4 import BeautifulSoup
import boto
import pandas as pd

# List of zip codes
df = pd.read_csv('~/Programming/zillow/work_zip_codes1.csv')

# CHANGE THE S3 FOLDER NAME!!!
# state = df[df['zip'] == 95138]
temp = df.ix[:, 'zip'].tolist()
# zipcodes = [temp[(i + 1) * 20: (i + 2) * 20] for i in range(int(len(temp) / 20))]

# Initialize the webdriver.
driver = zl.init_driver("/anaconda/bin/chromedriver")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)


def scrape_data(zc):
    st = zc

    conn = boto.connect_s3()
    bucket = conn.get_bucket('zillowstreamjk')

    # Create 11 variables from the scrapped HTML data.
Beispiel #10
0
# will yield every US zip code that begins with "10", begins with "11", or
# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=["98296", "98201"])

# Initialize the webdriver.
driver = zl.init_driver("/Users/gilliangoodman/Downloads/chromedriver")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
for idx, term in enumerate(st):
Beispiel #11
0
# begins with "606" as a single list.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=['900', '901', '902'])

# Initialize the webdriver.
driver = zl.init_driver(
    "/Users/rossi/PycharmProjects/Introduction_to_Linear_Regression_Analysis/chromedriver"
)

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'bathrooms': [],
    'bedrooms': [],
    'city': [],
    'price': [],
# than zip codes, simply skip running zipcodes_list() function below, and add 
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is 
# capped at 520, so in using a search term like "Chicago" the scraper would 
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode 
# string.
st = zl.zipcodes_list(st_items = ["02108", "02109","02110","02111","02113","02114","02115","02116",
                                  "02118","02119","02120","02121","02122","02124","02125","02126","02127",
                                  "02128","02129","02130","02131","02132","02134","02135","02136","02151","02152",
                                  "02163","02199","02203","02210","02215","02467"])
# st = "Boston, MA"
# Initialize the webdriver.

driver = zl.init_driver("/Users/maria/_CMU/_S18/15388/15388project/Zillow/chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes/recently_sold/")

# Click the "buy" button.
#zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)
print(num_search_terms)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
Beispiel #13
0
# will yield every US zip code that begins with "10", begins with "11", or
# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=["100", "770"])

# Initialize the webdriver.
driver = zl.init_driver("C:/Users/username/chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
for idx, term in enumerate(st):
Beispiel #14
0
import time
import pandas as pd
from bs4 import BeautifulSoup
import zillow_functions as zl


st = zl.zipcodes_list(st_items = ["93727"])


# Initialize the webdriver.
driver = zl.init_driver('/usr/local/share/chromedriver')

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")


# Click the "buy" button.
zl.click_buy_button(driver)


# Get total number of search terms.
num_search_terms = len(st)


# Initialize list obj that will house all scraped data.
output_data = []



# Start the scraping.
for idx, term in enumerate(st):
Beispiel #15
0
# than zip codes, simply skip running zipcodes_list() function below, and add 
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is 
# capped at 520, so in using a search term like "Chicago" the scraper would 
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode 
# string.
st = zl.zipcodes_list(st_items = ["15232"])


#connection to MySQL
engine = create_engine('mysql://*****:*****@localhost:3306/zillowproject', echo=False)

# Initialize the webdriver.
driver = zl.init_driver("E:\Programs\ChromeWebDriver\chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)


# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({'zpid': [],
                    'url' : [],
                    'zip' : [],
                   'address' : [], 
                   'bathrooms' : [], 
Beispiel #16
0
# string.
'''
01741 = Carlisle
01778 = Sudbury
01778 = Wayland
01890 = Winchester
02090 = Westwood
02493 = Weston
02420/1 = Lexington
'''
st = zl.zipcodes_list(st_items=[
    "01778", "02493", "02090", "01776", "01741", "02420", "02421", "01890"
])

# Initialize the webdriver.
driver = zl.init_driver("/Users/clchang/chromedriver")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Recently Sold Homes
zl.click_recently_sold_button(driver)
# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other 
# than zip codes, simply skip running zipcodes_list() function below, and add 
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is 
# capped at 520, so in using a search term like "Chicago" the scraper would 
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode 
# string.

#st = ["85201"]

# Initialize the webdriver.
driver = zl.init_driver("C:\webdrivers\chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.google.com")
print("Enter a zip code")
time.sleep(30.091)


# Initialize list obj that will house all scraped data.
output_data = []

raw_data = zl.get_html(driver)
print("%s pages of listings found" % str(len(raw_data)))

    # Take the extracted HTML and split it up by individual home listings.
listings = zl.get_listings(raw_data)
Beispiel #18
0
def search(event):
    # Create list of search terms.
    # Function zipcodes_list() creates a list of US zip codes that will be
    # passed to the scraper. For example, st = zipcodes_list(['10', '11', '606'])
    # will yield every US zip code that begins with '10', begins with "11", or
    # begins with "606" as a single list.
    # I recommend using zip codes, as they seem to be the best option for catching
    # as many house listings as possible. If you want to use search terms other
    # than zip codes, simply skip running zipcodes_list() function below, and add
    # a line of code to manually assign values to object st, for example:
    # st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
    # Keep in mind that, for each search term, the number of listings scraped is
    # capped at 520, so in using a search term like "Chicago" the scraper would
    # end up missing most of the results.
    # Param st_items can be either a list of zipcode strings, or a single zipcode
    # string.
    global list_of_zipcodes

    if len(list_of_zipcodes) == 0:
        print("No inputs given")
        zipcodes_label['text'] = "No Inputs Given"
        return

    st = zl.zipcodes_list(st_items=list(list_of_zipcodes))

    # Initialize the webdriver.
    driver = zl.init_driver(
        "/Users/jasontu/Projects/Real_Estate_Aggregator/Zillow/chromedriver")

    # Go to www.zillow.com/homes
    zl.navigate_to_website(driver, "http://www.zillow.com/homes")

    # Click the "buy" button.
    zl.click_buy_button(driver)

    # Create 11 variables from the scrapped HTML data.
    # These variables will make up the final output dataframe.
    df = pd.DataFrame({
        'address': [],
        'bathrooms': [],
        'bedrooms': [],
        'city': [],
        'days_on_zillow': [],
        'price': [],
        'sale_type': [],
        'state': [],
        'sqft': [],
        'url': [],
        'zip': []
    })

    # Get total number of search terms.
    num_search_terms = len(st)

    # Start the scraping.
    for k in range(num_search_terms):
        # Define search term (must be str object).
        search_term = st[k]

        # Enter search term and execute search.
        if zl.enter_search_term(driver, search_term):
            print("Entering search term number " + str(k + 1) + ": '" +
                  search_term + "' " + " out of " + str(num_search_terms))

        else:
            print("Search term " + str(k + 1) + ": '" + search_term + "' " +
                  " failed, moving onto next search term\n***")
            continue

        # Check to see if any results were returned from the search.
        # If there were none, move onto the next search.
        if zl.results_test(driver):
            print("Search " + str(search_term) +
                  " returned zero results. Moving onto the next search\n***")
            continue

        # Pull the html for each page of search results. Zillow caps results at
        # 20 pages, each page can contain 26 home listings, thus the cap on home
        # listings per search is 520.
        raw_data = zl.get_html(driver)
        print(str(len(raw_data)) + " pages of listings found")

        # Take the extracted HTML and split it up by individual home listings.
        listings = zl.get_listings(raw_data)

        # For each home listing, extract the 11 variables that will populate that
        # specific observation within the output dataframe.
        for n in range(len(listings)):
            soup = BeautifulSoup(listings[n], "lxml")
            new_obs = []

            # List that contains number of beds, baths, and total sqft (and
            # sometimes price as well).
            card_info = zl.get_card_info(soup)

            # Street Address
            new_obs.append(zl.get_street_address(soup))

            # Bathrooms
            new_obs.append(zl.get_bathrooms(card_info))

            # Bedrooms
            new_obs.append(zl.get_bedrooms(card_info))

            # City
            new_obs.append(zl.get_city(soup))

            # Days on the Market/Zillow
            new_obs.append(zl.get_days_on_market(soup))

            # Price
            new_obs.append(zl.get_price(soup, card_info))

            # Sale Type (House for Sale, New Construction, Foreclosure, etc.)
            new_obs.append(zl.get_sale_type(soup))

            # Sqft
            new_obs.append(zl.get_sqft(card_info))

            # State
            new_obs.append(zl.get_state(soup))

            # URL for each house listing
            new_obs.append(zl.get_url(soup))

            # Zipcode
            new_obs.append(zl.get_zipcode(soup))

            # Append new_obs to df as a new observation
            if len(new_obs) == len(df.columns):
                df.loc[len(df.index)] = new_obs

    # Close the webdriver connection.
    zl.close_connection(driver)

    # Write df to CSV.
    columns = [
        'address', 'city', 'state', 'zip', 'price', 'sqft', 'bedrooms',
        'bathrooms', 'days_on_zillow', 'sale_type', 'url'
    ]
    df = df[columns]
    dt = time.strftime("%Y-%m-%d") + "_" + time.strftime("%H%M%S")
    file_name = str(dt) + ".csv"
    df.to_csv(file_name, index=False)

    zipcodes_label[
        'text'] = "Scraping Complete. Review the following CSV file: \n" + str(
            dt) + ".csv"
    return
# will yield every US zip code that begins with "10", begins with "11", or
# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=["100", "770"])

# Initialize the webdriver.
driver = zl.init_driver(r"C:\Users\mhuem\chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
for idx, term in enumerate(st):
Beispiel #20
0
# will yield every US zip code that begins with "10", begins with "11", or
# begins with "606", as a list object.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ["Chicago", "New Haven, CT", "77005", "Jacksonville, FL"]
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=["98107", "98117", "98115", "98125", "98122"])

# Initialize the webdriver.
driver = zl.init_driver(
    "C:/Programming/RealEstate/Zillow-master/chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Get total number of search terms.
num_search_terms = len(st)

# Initialize list obj that will house all scraped data.
output_data = []

# Start the scraping.
for idx, term in enumerate(st):
Beispiel #21
0
# will yield every US zip code that begins with '10', begins with "11", or
# begins with "606" as a single list.
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
# Param st_items can be either a list of zipcode strings, or a single zipcode
# string.
st = zl.zipcodes_list(st_items=["111"])

# Initialize the webdriver.
driver = zl.init_driver("chromedriver.exe")

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({
    'address': [],
    'bathrooms': [],
    'bedrooms': [],
    'city': [],
    'days_on_zillow': [],
Beispiel #22
0
# I recommend using zip codes, as they seem to be the best option for catching
# as many house listings as possible. If you want to use search terms other
# than zip codes, simply skip running zipcodes_list() function below, and add
# a line of code to manually assign values to object st, for example:
# st = ['Chicago', 'New Haven, CT', '77005', 'Jacksonville, FL']
# Keep in mind that, for each search term, the number of listings scraped is
# capped at 520, so in using a search term like "Chicago" the scraper would
# end up missing most of the results.
st = zl.zipcodes_list(st_items = ['94102', '94103', '94104', '94105', '94107', '94108',
                                  '94109', '94110', '94111', '94112', '94114', '94115',
                                  '94116', '94117', '94118', '94121', '94122', '94123',
                                  '94124', '94127', '94129', '94130', '94131', '94132',
                                  '94133', '94134', '94158'])

# Initialize the webdriver.
driver = zl.init_driver('chromedriver')

# Go to www.zillow.com/homes
zl.navigate_to_website(driver, "http://www.zillow.com/homes")

# Click the "buy" button.
zl.click_buy_button(driver)

# Create 11 variables from the scrapped HTML data.
# These variables will make up the final output dataframe.
df = pd.DataFrame({'address' : [],
                   'city' : [],
                   'state' : [],
                   'zip' : [],
                   'price' : [],
                   'sqft' : [],