Exemple #1
0
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        Returns a sorted calldays table.
        """
        relevant_columns = ", ".join(
            get_columns_for_level(self.level, self.column_name))

        sql = f"""
        SELECT * FROM (
            SELECT
                connections.subscriber,
                {relevant_columns},
                COUNT(*) AS calldays
            FROM (
                SELECT DISTINCT locations.subscriber, {relevant_columns}, locations.time::date
                FROM ({self.ul.get_query()}) AS locations
            ) AS connections
            GROUP BY connections.subscriber, {relevant_columns}
        ) calldays
        ORDER BY calldays.subscriber ASC, calldays.calldays DESC
        """

        return sql
Exemple #2
0
    def __get_location_buffer(self):
        """
        Protected method for generating SQL
        for the buffer areas between a location
        (i..e an origin) and all its possible
        counterparts (i.e. destinations).
        """
        cols = get_columns_for_level(self.level)

        from_cols = ", ".join("{c}_from".format(c=c) for c in cols)
        to_cols = ", ".join("{c}_to".format(c=c) for c in cols)
        sql = """

            SELECT
                {froms},
                {tos},
                A.distance AS distance,
                A.geom_origin AS geom_origin,
                A.geom_destination AS geom_destination,
                ST_Buffer(A.geom_destination::geography, A.distance * 1000) AS geom_buffer
            FROM ({distance_matrix_table}) AS A

        """.format(
            distance_matrix_table=self.distance_matrix.get_query(),
            froms=from_cols,
            tos=to_cols,
        )

        return sql
Exemple #3
0
    def _make_query(self):
        """
        Protected method that generates SQL
        that calculates the population that is
        covered by a buffer.
        """
        cols = get_columns_for_level(self.level)

        from_cols = ", ".join("B.{c}_from".format(c=c) for c in cols)
        outer_from_cols = ", ".join("C.{c}_from".format(c=c) for c in cols)
        to_cols = ", ".join("B.{c}_to".format(c=c) for c in cols)
        outer_to_cols = ", ".join("C.{c}_to".format(c=c) for c in cols)
        pop_join = " AND ".join("A.{c} = B.{c}_to".format(c=c) for c in cols)
        sql = """
            SELECT row_number() OVER(ORDER BY C.geom_buffer) AS id,
                {froms_outer},
                {tos_outer},
                C.distance,
                C.geom_buffer,
                sum(C.destination_population) AS buffer_population,
                count(*) AS n_sites
                FROM
                (SELECT
                    {froms},
                    {tos},
                    B.distance,
                    B.geom_buffer,
                    A.destination_population
                FROM (
                    SELECT DISTINCT ON ({tos})
                        {tos},
                        B.geom_destination,
                        A.total AS destination_population
                    FROM ({population_table}) AS A
                    JOIN ({location_buffer_table}) AS B
                        ON {pop_join}
                ) AS A
                INNER JOIN ({location_buffer_table}) AS B
                    ON ST_Intersects(B.geom_buffer::geography,          
                                    A.geom_destination::geography)) AS C
            GROUP BY {froms_outer},
                    {tos_outer},
                    C.distance,
                    C.geom_buffer

        """.format(
            population_table=self.population_object.get_query(),
            location_buffer_table=self.__get_location_buffer(),
            pop_join=pop_join,
            froms=from_cols,
            tos=to_cols,
            froms_outer=outer_from_cols,
            tos_outer=outer_to_cols,
        )

        return sql
Exemple #4
0
    def index_cols(self):
        """
        A list of columns to use as indexes when storing this query.


        Returns
        -------
        ixen : list
            By default, returns the location columns if they are present
            and self.level is defined, and the subscriber column.

        Examples
        --------
        >>> daily_location("2016-01-01").index_cols
        [['name'], '"subscriber"']
        """
        from flowmachine.utils.utils import (
            get_columns_for_level, )  # Local import to avoid circular import

        cols = self.column_names
        ixen = []
        try:
            # Not all objects define the attribute column_name so we'll fall
            # back to the default if it is not defined
            try:
                loc_cols = get_columns_for_level(self.level, self.column_name)
            except AttributeError:
                loc_cols = get_columns_for_level(self.level)
            if set(loc_cols).issubset(cols):
                ixen.append(loc_cols)
        except AttributeError:
            pass
        try:
            if self.subscriber_identifier in cols:
                ixen.append(self.subscriber_identifier)
            else:
                ixen.append('"subscriber"')
        except AttributeError:
            pass
        return ixen
Exemple #5
0
    def _make_query(self):
        cols = get_columns_for_level(self.level)
        sql_location_table = "SELECT * FROM infrastructure." + (
            "sites" if self.level == "versioned-site" else "cells")
        try:
            cols.remove("lat")
            cols.remove("lon")
        except ValueError:
            pass  # Nothing to remove

        from_cols = ", ".join("A.{c_id_safe} AS {c}_from".format(
            c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols)
        to_cols = ", ".join("B.{c_id_safe} AS {c}_to".format(
            c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols)

        return_geometry_statement = ""
        if self.return_geometry:
            return_geometry_statement = """
                ,
                A.geom_point AS geom_origin,
                B.geom_point AS geom_destination
            """

        sql = """

            SELECT
                {froms},
                {tos},
                ST_X(A.geom_point::geometry) AS lon_from,
                ST_Y(A.geom_point::geometry) AS lat_from,
                ST_X(B.geom_point::geometry) AS lon_to,
                ST_Y(B.geom_point::geometry) AS lat_to,
                ST_Distance(
                    A.geom_point::geography, 
                    B.geom_point::geography
                ) / 1000 AS distance
                {return_geometry_statement}
            FROM ({location_table_statement}) AS A
            CROSS JOIN ({location_table_statement}) AS B
            ORDER BY distance DESC
            
        """.format(
            location_table_statement=sql_location_table,
            froms=from_cols,
            tos=to_cols,
            return_geometry_statement=return_geometry_statement,
        )

        return sql
Exemple #6
0
def test_column_list():
    """Test that supplying the column name as a list returns it as a new list."""
    passed_cols = ["frogs", "dogs"]
    returned_cols = get_columns_for_level("admin0", passed_cols)
    assert passed_cols == returned_cols
    assert id(passed_cols) != id(returned_cols)
Exemple #7
0
def test_columns_for_level_errors(level, column_name, error):
    """Test that get_columns_for_level raises correct errors"""
    with pytest.raises(error):
        get_columns_for_level(level, column_name)
Exemple #8
0
 def column_names(self) -> List[str]:
     return (["subscriber"] +
             get_columns_for_level(self.level, self.column_name) +
             ["calldays"])
Exemple #9
0
    def run(
        self,
        uniform_departure_rate=0.1,
        departure_rate_vector=None,
        ignore_missing=False,
    ):
        """
        Runs model.

        Parameters
        ----------
        uniform_departure_rate : float
            Proportion of population from location i
            that will be departing in observed time period.
            This proportion applies to all locations
            uniformly.

        departure_rate_vector : dict
            A dictionary that contains the proportion
            of the population from locations i that have
            departed those locations. The keys of the
            dictionaries must be the location identifier
            and the values the departure rate.
            If passed, this will be used over the
            `uniform_departure_rate` parameter.

        ignore_missing : bool
            If True, existing locations that are not
            found in the departure_rate_vector dictionary
            will be computed using zero departures.

        Returns
        -------
        A pandas dataframe with a mobility matrix.

        """

        if "population_buffer" not in self.__dict__.keys():
            logger.warn(" Computing Population() and DistanceMatrix() " +
                        "objects. This can take a few minutes.")

            population_df = self.population_object.get_dataframe()
            population_buffer = self.population_buffer_object.get_dataframe()
            ix = get_columns_for_level(self.level)
            ix = ["{}_{}".format(c, d) for d in ("from", "to") for c in ix]
            population_buffer.set_index(ix, inplace=True)

            M = population_df["total"].sum()
            N = len(population_df[get_columns_for_level(
                self.level)].drop_duplicates())
            beta = 1 / M

            locations = population_df[get_columns_for_level(
                self.level)].values.tolist()
            population_df.set_index(get_columns_for_level(self.level),
                                    inplace=True)

        if not departure_rate_vector:
            logger.warn(" Using an uniform departure " +
                        "rate of {} for ".format(uniform_departure_rate) +
                        "all locations.")
        elif not ignore_missing and len(departure_rate_vector) != len(
                locations):
            raise ValueError("Locations missing from " +
                             "`departure_rate_vector`. Use " +
                             "ignore_missing=True if locations " +
                             "without rates should be ignored.")

        results = []
        for i in locations:
            sigma = 0
            m_i = self.__get_population(population_df, i)

            if departure_rate_vector:
                try:
                    T_i = m_i * departure_rate_vector[i[0]]
                except KeyError:
                    try:
                        T_i = m_i * departure_rate_vector[tuple(i[:1])]
                    except KeyError:
                        T_i = 0
            else:
                T_i = m_i * uniform_departure_rate

            for k in [l for l in locations if l != i]:
                m_k = self.__get_population(population_df, k)
                S_ik = self.__get_buffer_population(population_buffer, i, k)

                sigma += m_k * ((1 / S_ik) - beta)

            for j in [l for l in locations if l != i]:
                m_j = self.__get_population(population_df, j)
                S_ij = self.__get_buffer_population(population_buffer, i, j)

                T_ij = (T_i * m_j * ((1 / S_ij) - beta)) / sigma

                if T_i != 0:
                    probability = T_ij / T_i
                else:
                    probability = 0

                results.append(i + j + [T_ij, probability])
        ix = get_columns_for_level(self.level)
        ix = ["{}_{}".format(c, d) for d in ("from", "to") for c in ix]
        ix += ["prediction", "probability"]
        res = pd.DataFrame(results, columns=ix)
        return res