def _calc_distances(df_prop, df_hosp):
        """
       calculates the distance from each property to each hospital

        Parameters
        ----------
        df_prop: pandas.DataFrame
        df_hosp: pandas.DataFrame

        Returns
        -------
        pandas.DataFrame
        """
        pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values
        pos_hosp = df_hosp[['LATITUDE', 'LONGITUDE']].values

        (prop_rows, prop_columns) = pos_prop.shape
        (hosp_rows, hosp_columns) = pos_hosp.shape

        distances = np.zeros((hosp_rows, prop_rows))

        for k in range(hosp_rows):
            distances[k, :] = distance_estimators.spherical_distance(pos_prop.copy(), pos_hosp[k].copy())

        min_indices = np.argmin(distances, 0)
        min_distances = np.amin(distances, 0)

        hosp_names = df_hosp['name'].values
        df_prop['NEAREST_HOSPITAL'] = hosp_names[min_indices]
        df_prop['MIN_DISTANCE'] = min_distances
        df_prop = df_prop[['LONGITUDE', 'LATITUDE', 'NEAREST_HOSPITAL', 'MIN_DISTANCE', 'AV_TOTAL',
                           'LIVING_AREA']].copy()
        return df_prop
    def _calc_distances(df_prop, df_school):
        """
       calculates the distance from each property to each school

        Parameters
        ----------
        df_prop: pandas.DataFrame
        df_school: pandas.DataFrame

        Returns
        -------
        pandas.DataFrame
        """
        pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values
        pos_school = df_school[['LATITUDE', 'LONGITUDE']].values

        (prop_rows, prop_columns) = pos_prop.shape
        (school_rows, school_columns) = pos_school.shape

        distances = np.zeros((school_rows, prop_rows))

        for k in range(school_rows):
            distances[k, :] = distance_estimators.spherical_distance(pos_prop.copy(), pos_school[k].copy())

        min_indices = np.argmin(distances, 0)
        min_distances = np.amin(distances, 0)

        school_names = df_school['sch_name'].values
        df_prop['NEAREST_SCHOOL'] = school_names[min_indices]
        df_prop['MIN_DISTANCE'] = min_distances
        df_prop = df_prop[['LONGITUDE', 'LATITUDE', 'NEAREST_SCHOOL', 'MIN_DISTANCE', 'AV_TOTAL',
                           'LIVING_AREA']].copy()
        return df_prop
def sse_group(df, columns):
    """
    Calculates the SSE for a cluster given a dataframe with a list of X columns

    Parameters
    ----------
    df: pandas.DataFrame
    columns: list

    Returns
    -------
    np.float
    """
    distances = distance_estimators.spherical_distance(df[columns].values, np.array([df[columns].mean().values]))
    distances *= distances
    sse = sum(distances)
    return sse
def sse_group(df, columns):
    """
    Calculates the SSE for a cluster given a dataframe with a list of X columns

    Parameters
    ----------
    df: pandas.DataFrame
    columns: list

    Returns
    -------
    np.float
    """
    distances = distance_estimators.spherical_distance(
        df[columns].values, np.array([df[columns].mean().values]))
    distances *= distances
    sse = sum(distances)
    return sse
예제 #5
0
    def _calc_distances(df_prop, df_school):
        """
       calculates the distance from each property to each school

        Parameters
        ----------
        df_prop: pandas.DataFrame
        df_school: pandas.DataFrame

        Returns
        -------
        pandas.DataFrame
        """
        pos_prop = df_prop[['LATITUDE', 'LONGITUDE']].values
        pos_school = df_school[['LATITUDE', 'LONGITUDE']].values

        (prop_rows, prop_columns) = pos_prop.shape
        (school_rows, school_columns) = pos_school.shape

        distances = np.zeros((school_rows, prop_rows))

        for k in range(school_rows):
            distances[k, :] = distance_estimators.spherical_distance(
                pos_prop.copy(), pos_school[k].copy())

        min_indices = np.argmin(distances, 0)
        min_distances = np.amin(distances, 0)

        school_names = df_school['sch_name'].values
        df_prop['NEAREST_SCHOOL'] = school_names[min_indices]
        df_prop['MIN_DISTANCE'] = min_distances
        df_prop = df_prop[[
            'LONGITUDE', 'LATITUDE', 'NEAREST_SCHOOL', 'MIN_DISTANCE',
            'AV_TOTAL', 'LIVING_AREA'
        ]].copy()
        return df_prop