Exemple #1
0
# -*- coding: utf-8 -*-
"""
Classes to explore Zoopla data

@author: Adrian Carro
"""

import pandas as pd
import Datasets as ds
import numpy as np


decision = ds.ZooplaMatchedAggregated().read(100)
pd.set_option('display.max_columns', None)

# data = ds.ZooplaMatchedDaily()
data = ds.ZooplaRawCollated()
chunk = data.read(500)
filteredChunk = chunk[(chunk["CREATED"] != chunk["LATEST SOLD"]) & (np.invert(pd.isnull(chunk["LATEST SOLD"])))]
# filteredChunk = chunk[np.invert(pd.isnull(chunk["LATEST SOLD"]))]


# filtered_chunk = chunk[chunk["MARKET"]=="SALE"][['LISTING ID','DAY','PRICE']][chunk['PRICE']>0]
pd.set_option('display.max_rows', None)
pd.set_option("display.max_columns", None)
# print decision[['LISTING ID', 'CREATED', 'DELETED']]
print len(filteredChunk)
print filteredChunk
def markup(row):
    return (row['INITIAL PRICE'] / backProjectedPrice(
        datetime.strptime(row['CREATED'], "%Y-%m-%d"),
        datetime.strptime(row['LATEST SOLD'], "%Y-%m-%d"), row['PRICE']))


def averageDaysOnMarket(data, date):
    dom = [(datetime.strptime(row[1], "%Y-%m-%d") -
            datetime.strptime(row[0], "%Y-%m-%d")).days
           for row in data[data['LATEST SOLD'] == date]
           [['CREATED', 'LATEST SOLD']].values]
    return (sum(dom) / len(dom))


data = ds.ZooplaRawCollated()  # 2008-11-06
#data = ds.ZooplaRawCollated(2000000) # 2009-09-30
#data = ds.ZooplaRawCollated(3900000) # 2010-04-27
#data = ds.ZooplaRawCollated(4000000) # 2010-05-07?
chunk = data.read(200000)
#filteredchunk = chunk[(chunk["MARKET"]=="SALE") & (chunk['INITIAL PRICE'].values>0) & (chunk['INITIAL PRICE'].values<10000000)][['LAND REGISTRY UID','CREATED','INITIAL PRICE','LATEST SOLD']]
filteredchunk = chunk[(chunk["MARKET"] == "SALE")
                      & (chunk['INITIAL PRICE'].values > 0) &
                      (chunk['INITIAL PRICE'].values < 10000000)][[
                          'CREATED', 'INITIAL PRICE', 'LATEST SOLD', 'PRICE'
                      ]]
date = datetime.strptime("2008-10-11", "%Y-%m-%d")
refdate = datetime.strptime("1900-01-01", "%Y-%m-%d")

soldListings = chunk[(chunk["MARKET"] == "SALE")
                     & (chunk['INITIAL PRICE'].values > 0) &