def __init__(self): rawData = ds.EHSinterview() incomeRent = rawData.loc[:, [self.incomeField, "rentwkx", "bedrqx"]] # Filter for fields of interest self.renterData = incomeRent[incomeRent["rentwkx"] > 0] # filter out non renters # self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed # split the data into 2D histogram data self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData["rentwkx"].values), np.log(self.renterData[self.incomeField].values), bins=[40, 30]) self.xaxis = (np.array(self.xbins[1:]) + np.array(self.xbins[:-1])) / 2.0 self.yaxis = (np.array(self.ybins[1:]) + np.array(self.ybins[:-1])) / 2.0 self.popdf = pd.DataFrame(data=np.zeros( ((len(self.xbins) - 1) * (len(self.ybins) - 1), 3)), columns=['rental price', 'income', 'p']) i = 0 totalPop = self.population.sum() for param in range(1, len(self.ybins)): for out in range(1, len(self.xbins)): self.popdf.iloc[i, 0] = (self.xbins[out] + self.xbins[out - 1]) / 2.0 self.popdf.iloc[i, 1] = (self.ybins[param] + self.ybins[param - 1]) / 2.0 self.popdf.iloc[i, 2] = self.population[out - 1, param - 1] * 1.0 / totalPop i += 1
def __init__(self): rawData = ds.EHSinterview() incomeRent = rawData.loc[:,[self.incomeField,"rentwkx","bedrqx"]] # Filter for fields of interest self.renterData = incomeRent[incomeRent["rentwkx"]>0] # filter out non renters # self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed # split the data into 2D histogram data self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData["rentwkx"].values), np.log(self.renterData[self.incomeField].values), bins=[40,30])
def __init__(self): # Bring raw data from Datasets class reader for the English Housing Survey data raw_data = ds.EHSinterview() # Filter for field rentwkx, total weekly rent payable (rent plus housing benefit) income_rent = raw_data.loc[:, [self.incomeField, "rentwkx"]] # Filter out non renters and unreasonably large weekly rent values self.renterData = income_rent[(income_rent["rentwkx"] > 0) & (income_rent["rentwkx"] < 50000)] # Filter out strings at rentwkx column self.renterData = self.renterData[self.renterData["rentwkx"].apply( lambda x: not isinstance(x, str))] # Cast rentwkx column values as numpy float64 type self.renterData = self.renterData.astype({"rentwkx": np.float64}) # Split the data into a 2D histogram with logarithmic bins (no normalisation here as we want column # normalisation, to be introduced when plotting) self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData[self.incomeField].values), np.log(self.renterData["rentwkx"].values), bins=[30, 30]) # Transpose the matrix as histogram2d returns a list of columns instead of a list of rows self.population = self.population.T
# -*- coding: utf-8 -*- """ Class to study households' saving behaviour based on English Housing Survey data. @author: daniel, Adrian Carro """ import Datasets as ds import pandas as pd import matplotlib.pyplot as plt # Read English Housing Survey interview data derived = ds.EHSinterview() # Read English Housing Survey income data income = ds.EHSincome() # Merge both Dataframes (join on aacode column, which must be present in both frames, and use intersection of keys) joined = pd.merge(derived, income, on='aacode', how='inner') # Keep only HYEARGRx and AmtSvng1b columns: # - HYEARGRx: household gross annual income (inc. income from all adult household members). An extension of the gross # income of the HRP and any partner. This variable represents the household gross income of ALL adults living within # the household. # - AmtSvng1b: Amount of savings/money invested. # Value = 1.0 Label = under £1,000 # Value = 2.0 Label = £1,000 to £2,999 # Value = 3.0 Label = £3,000 to £4,999 # Value = 4.0 Label = £5,000 to £5,999 # Value = 5.0 Label = £6,000 to £6,999 # Value = 6.0 Label = £7,000 to £7,999 # Value = 7.0 Label = £8,000 to £11,999 # Value = 8.0 Label = £12,000 to £15,999 # Value = 9.0 Label = £16,000 to £19,999
# -*- coding: utf-8 -*- """ Created on Wed Apr 29 13:29:06 2015 @author: daniel """ import Datasets as ds import numpy as np import matplotlib.pyplot as plt rawData = ds.EHSinterview() data = rawData[['Prevten','agehrpx','lenresb','tenure4','HYEARGRx']][(rawData['Prevten'] >0) & (rawData['Prevten'] < 9)] data["ageAtMove"] = data['agehrpx'] - data['lenresb'] data['tenChange'] = ((data['Prevten'] < 5) & (data['Prevten']>1))*10.0 + ((data['Prevten']>4) & (data['Prevten']<7))*20.0 + (data['Prevten']>6)*30.0 + data['tenure4'] formationData = data[['ageAtMove']][data['tenChange']<10] formationRentData = data[['ageAtMove']][data['tenChange']==3] moverData = data[['ageAtMove']][(data['tenChange']>10) & (data['tenChange']<20)] rentownData = data[['ageAtMove']][data['tenChange'] == 31] fluxes = data[['tenChange']] incomeFormRent = data[['HYEARGRx']][data['tenChange']==3] incomeFormSoc = data[['HYEARGRx']][data['tenChange']==2] incomeFormOwn = data[['HYEARGRx']][data['tenChange']==1] #hist, bins = np.histogram(ftbData) #plt.hist(fluxes.values, bins=33) #plt.hist(incomeFormRent.values, bins=20) plt.hist(incomeFormSoc.values,incomeFormRent.values, incomeFormOwn.values], bins=20, stacked=True) #plt.hist(incomeFormOwn.values, bins=20) #plt.hist(rentownData.values, bins=20)