def _calc_distances(df_prop, df_hosp): """ calculates the distance from each property to each hospital Parameters ---------- df_prop: pandas.DataFrame df_hosp: pandas.DataFrame Returns ------- pandas.DataFrame """ pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values pos_hosp = df_hosp[['LATITUDE', 'LONGITUDE']].values (prop_rows, prop_columns) = pos_prop.shape (hosp_rows, hosp_columns) = pos_hosp.shape distances = np.zeros((hosp_rows, prop_rows)) for k in range(hosp_rows): distances[k, :] = distance_estimators.spherical_distance(pos_prop.copy(), pos_hosp[k].copy()) min_indices = np.argmin(distances, 0) min_distances = np.amin(distances, 0) hosp_names = df_hosp['name'].values df_prop['NEAREST_HOSPITAL'] = hosp_names[min_indices] df_prop['MIN_DISTANCE'] = min_distances df_prop = df_prop[['LONGITUDE', 'LATITUDE', 'NEAREST_HOSPITAL', 'MIN_DISTANCE', 'AV_TOTAL', 'LIVING_AREA']].copy() return df_prop
def _calc_distances(df_prop, df_school): """ calculates the distance from each property to each school Parameters ---------- df_prop: pandas.DataFrame df_school: pandas.DataFrame Returns ------- pandas.DataFrame """ pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values pos_school = df_school[['LATITUDE', 'LONGITUDE']].values (prop_rows, prop_columns) = pos_prop.shape (school_rows, school_columns) = pos_school.shape distances = np.zeros((school_rows, prop_rows)) for k in range(school_rows): distances[k, :] = distance_estimators.spherical_distance(pos_prop.copy(), pos_school[k].copy()) min_indices = np.argmin(distances, 0) min_distances = np.amin(distances, 0) school_names = df_school['sch_name'].values df_prop['NEAREST_SCHOOL'] = school_names[min_indices] df_prop['MIN_DISTANCE'] = min_distances df_prop = df_prop[['LONGITUDE', 'LATITUDE', 'NEAREST_SCHOOL', 'MIN_DISTANCE', 'AV_TOTAL', 'LIVING_AREA']].copy() return df_prop
def sse_group(df, columns): """ Calculates the SSE for a cluster given a dataframe with a list of X columns Parameters ---------- df: pandas.DataFrame columns: list Returns ------- np.float """ distances = distance_estimators.spherical_distance(df[columns].values, np.array([df[columns].mean().values])) distances *= distances sse = sum(distances) return sse
def sse_group(df, columns): """ Calculates the SSE for a cluster given a dataframe with a list of X columns Parameters ---------- df: pandas.DataFrame columns: list Returns ------- np.float """ distances = distance_estimators.spherical_distance( df[columns].values, np.array([df[columns].mean().values])) distances *= distances sse = sum(distances) return sse
def _calc_distances(df_prop, df_school): """ calculates the distance from each property to each school Parameters ---------- df_prop: pandas.DataFrame df_school: pandas.DataFrame Returns ------- pandas.DataFrame """ pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values pos_school = df_school[['LATITUDE', 'LONGITUDE']].values (prop_rows, prop_columns) = pos_prop.shape (school_rows, school_columns) = pos_school.shape distances = np.zeros((school_rows, prop_rows)) for k in range(school_rows): distances[k, :] = distance_estimators.spherical_distance( pos_prop.copy(), pos_school[k].copy()) min_indices = np.argmin(distances, 0) min_distances = np.amin(distances, 0) school_names = df_school['sch_name'].values df_prop['NEAREST_SCHOOL'] = school_names[min_indices] df_prop['MIN_DISTANCE'] = min_distances df_prop = df_prop[[ 'LONGITUDE', 'LATITUDE', 'NEAREST_SCHOOL', 'MIN_DISTANCE', 'AV_TOTAL', 'LIVING_AREA' ]].copy() return df_prop