import analysis import processData import numpy as np # read data from csv file X_housing = processData.import_csv('data\\Housing_Final.csv') # calculate transit score numMax = np.max(X_housing['numTransitStops'].tolist()) X_housing['TransitScore'] = 0.5 * X_housing[ 'numTransitStops'] / numMax * 100 + 0.5 * X_housing['walkscore'] # calculate available land value X_housing[ 'available_land_sqft'] = X_housing['lot_areaft'] - X_housing['bldg_area'] # filter none_vacant parcels X_housing_non_vacant = analysis.filter_luc(X_housing, 'housing_non_vacant') # group data by district and city_and_town X_housing_non_vacant_district = X_housing_non_vacant.pivot_table( index=['District'], values=['available_land_sqft', 'TransitScore', 'median_hh_income'], aggfunc=[np.sum, np.mean]) X_housing_non_vacant_district.to_csv('data\\district_housing_non_vacant.csv') X_housing_non_vacant_ct = X_housing_non_vacant.pivot_table( index=['muni'], values=['available_land_sqft', 'TransitScore', 'median_hh_income'], aggfunc=[np.sum, np.mean]) X_housing_non_vacant_ct.to_csv('data\\ct_housing_non_vacant.csv')
import processData import numpy as np # read data district_transportation = processData.import_csv( 'data\\district_transportation.csv') ct_transportation = processData.import_csv('data\\ct_transportation.csv') district_shi = processData.import_csv('data\\district_shi.csv') ct_shi = processData.import_csv('data\\ct_shi.csv') # add a column to ct_transportation and district_transportation of percentage of shi units ct_value = ct_shi.values district_value = district_shi.values ct_list = ct_value[:, 0].tolist() district_list = district_value[:, 0].tolist() ct_transportation_list = ct_transportation.values.tolist() district_transportation_list = district_transportation.values.tolist() for i in district_transportation_list: index = district_list.index(i[0]) i.append(district_value[index][3]) for i in ct_transportation_list: index = ct_list.index(i[0]) i.append(ct_value[index][4]) # write back to file X_district = np.array(district_transportation_list) header_district = district_transportation.columns.values.tolist() header_district.append('percentage_of_shi_units') X_ct = np.array(ct_transportation_list) header_ct = ct_transportation.columns.values.tolist() header_ct.append('percentage_of_shi_units') processData.write_csv('data\\district_transportation.csv', X_district, header_district)
import processData import numpy as np # Read from district and communities dis_com_df = processData.import_csv('data\\dis_com.csv') dis_com = dis_com_df.values # Read from communities shi form com_shi_df = processData.import_csv('SHI_new.csv') com_shi = com_shi_df.values # Confine districts and communities confined_dis_com = [] confined_dis_com.append(dis_com[0]) for i in range(1, len(dis_com)): if dis_com[i][0] != dis_com[i - 1][0] or dis_com[i][1] != dis_com[i - 1][1]: confined_dis_com.append(dis_com[i]) confined_dis_com = np.array(confined_dis_com) # Select districts and communities districts = confined_dis_com[:, 0] communities = com_shi[:, 0] # Transfer to set to remove duplications dis_set = set(districts) com_set = set(communities) # Form a new list without duplications dis_new = list(dis_set) # Combine shi data according to districts dis_shi = [] m = len(dis_set) for i in range(m): dis_shi.append([]) dis_shi[i].append(0)
import processData import numpy as np # read data from csv file X_transportation = processData.import_csv('data\\Transportation_Final.csv') # calculate transit score numMax = np.max(X_transportation['numTransitStops'].tolist()) X_transportation['TransitScore'] = 0.5 * X_transportation['numTransitStops'] / numMax * 100 + 0.5 * X_transportation[ 'walkscore'] # calculate available land value X_transportation['available_land_sqft'] = X_transportation['lot_areaft'] - X_transportation['bldg_area'] # group data by district and city_and_town X_transportation_district = X_transportation.pivot_table(index=['District'], values=['available_land_sqft', 'TransitScore', 'median_hh_income'], aggfunc=[np.sum, np.mean]) X_transportation_district.to_csv('data\\district_transportation.csv') X_transportation_ct = X_transportation.pivot_table(index=['muni'], values=['available_land_sqft', 'TransitScore', 'median_hh_income'], aggfunc=[np.sum, np.mean]) X_transportation_ct.to_csv('data\\ct_transportation.csv')
import processData import numpy as np # read data district_vacant_housing = processData.import_csv( 'data\\district_housing_vacant.csv') ct_vacant_housing = processData.import_csv('data\\ct_housing_vacant.csv') district_shi = processData.import_csv('data\\district_shi.csv') ct_shi = processData.import_csv('data\\ct_shi.csv') # add a column to ct_vacant_housing and district_vacant_housing of percentage of shi units ct_value = ct_shi.values district_value = district_shi.values ct_list = ct_value[:, 0].tolist() district_list = district_value[:, 0].tolist() ct_vacant_housing_list = ct_vacant_housing.values.tolist() district_vacant_housing_list = district_vacant_housing.values.tolist() for i in district_vacant_housing_list: index = district_list.index(i[0]) i.append(district_value[index][3]) for i in ct_vacant_housing_list: index = ct_list.index(i[0]) i.append(ct_value[index][4]) # write back to file X_district = np.array(district_vacant_housing_list) header_district = district_vacant_housing.columns.values.tolist() header_district.append('percentage_of_shi_units') X_ct = np.array(ct_vacant_housing_list) header_ct = ct_vacant_housing.columns.values.tolist() header_ct.append('percentage_of_shi_units') processData.write_csv('data\\district_housing_vacant.csv', X_district, header_district)