-
Notifications
You must be signed in to change notification settings - Fork 0
/
rent_scraper.py
79 lines (61 loc) · 2.28 KB
/
rent_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 17 13:21:44 2015
@author: andyjones
"""
import json
import os
import cPickle
import time
import scipy as sp
import pandas as pd
import zoopla
API_KEY = json.load(open('keys.json', 'r'))['zoopla']
SEARCH_OPTIONS = dict(
listing_status='rent',
radius=0.5,
minimum_beds=2,
maximum_beds=2,
order_by='age',
include_rented=1,
summarised=1)
RATE_LIMIT = 100
WEEKS_PER_MONTH = 365/12/(365/52.)
def get_coords():
df = pd.DataFrame(json.load(open('coords.json'))).T
df.columns = ['lat', 'lon']
return df
def append_rental_information(name, lat, lon, file_name):
api = zoopla.api(version=1, api_key=API_KEY)
request_interval = 3600/RATE_LIMIT + 1
try:
listings = list(api.property_listings(latitude=lat, longitude=lon, **SEARCH_OPTIONS))
current_store = cPickle.load(open(file_name, 'r'))
current_store[name] = listings
cPickle.dump(current_store, open(file_name, 'w+'))
print('Fetched {}, found {} listings'.format(name, len(listings)))
time.sleep(request_interval)
except Exception as e:
print 'Failed with error {} on name {} and coords {}'.format(e, name, (lat, lon))
def accumulate_rental_information(file_name):
coords = get_coords()
if not os.path.exists(file_name):
cPickle.dump({}, open(file_name, 'w+'))
already_processed = set()
else:
current_store = cPickle.load(open(file_name, 'r'))
already_processed = {k for k, v in current_store.iteritems() if v is not None}
for name, row in coords.iterrows():
if name not in already_processed:
append_rental_information(name, row['lat'], row['lon'], file_name)
else:
print('Skipping {}, since it\'s already in the file'.format(name))
def get_rent_statistic(listings):
return [WEEKS_PER_MONTH*int(l.price) for l in listings]
def get_rent_statistics(file_name):
store = cPickle.load(open(file_name, 'r'))
return {name: get_rent_statistic(listings) for name, listings in store.iteritems()}
def save_rent_statistics(file_name):
stats = get_rent_statistics(file_name)
out_name = os.path.basename(file_name) + '.json'
json.dump(stats, open(out_name, 'w+'))