Beispiel #1
0
 def column_names(self) -> List[str]:
     return (
         ["subscriber", "msisdn_counterpart"] +
         get_columns_for_level(self.level, self.column_name) + [
             f"{x}_counterpart"
             for x in get_columns_for_level(self.level, self.column_name)
         ] + [f"duration_{self.statistic}"])
Beispiel #2
0
    def _make_query(self):
        loc_cols = get_columns_for_level(self.level, self.column_name)
        loc_cols += [
            "{}_counterpart".format(c)
            for c in get_columns_for_level(self.level, self.column_name)
        ]
        loc_cols = ", ".join(loc_cols)

        return f"""
Beispiel #3
0
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        """
        relevant_columns = ", ".join(
            get_columns_for_level(self.level, self.column_name))

        sql = """
        SELECT
            day_trajectories.subscriber,
            day_trajectories.{rc},
            COUNT(*) AS dl_count
        FROM
            ({day_trajectories}) AS day_trajectories
        GROUP BY 
            day_trajectories.subscriber,
            day_trajectories.{rc}
        ORDER BY
            day_trajectories.subscriber,
            COUNT(*) DESC
        """.format(rc=relevant_columns,
                   day_trajectories=self.day_trajectories.get_query())

        return sql
Beispiel #4
0
    def _make_query(self):

        right_columns = get_columns_for_level(self.level, self.column_name)
        left_columns = self.left.column_names
        if "location_id" in right_columns and "location_id" in left_columns:
            left_columns.remove("location_id")

        right_columns_str = ", ".join([f"sites.{c}" for c in right_columns])
        left_columns_str = ", ".join([f"l.{c}" for c in left_columns])

        sql = f"""
        SELECT
            {left_columns_str},
            {right_columns_str}
        FROM
            ({self.left.get_query()}) AS l
        INNER JOIN
            ({self.right_query.get_query()}) AS sites
        ON
            l.location_id = sites.location_id
          AND
            l.{self.time_col}::date BETWEEN coalesce(sites.date_of_first_service,
                                                '-infinity'::timestamptz) AND
                                       coalesce(sites.date_of_last_service,
                                                'infinity'::timestamptz)
        """

        return sql
    def _make_query(self):
        # Grouped now represents a query with activities on the level of the cell,
        # if that is what the user has asked for then we are done, otherwise
        # we need to do the appropriate join and do a further group by.
        if self.level == "cell":
            sql = self._obj.get_query()
            cols = self._obj.groups
        # Otherwise we're after lat-lon, or an admin region.
        # in either case we need to join with the cell data
        else:
            cols = ", ".join(
                get_columns_for_level(self.level, self.column_name))
            cols += ", " + self._obj.time_col
            if self.interval == "hour" or self.interval == "min":
                cols += ",hour"
            if self.interval == "min":
                cols += ",min"

            sql = """

                SELECT
                    {cols},
                    sum(total) AS total
                FROM
                    ({j}) AS j
                GROUP BY
                    {cols}
                ORDER BY {cols}
            """.format(cols=cols, j=self._obj.get_query())

        return sql
Beispiel #6
0
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        """
        subscriber_query = "{} ORDER BY time".format(
            self.subscriber_locs.get_query())

        relevant_columns = ", ".join(
            get_columns_for_level(self.level, self.column_name))

        # Create a table which has the total times each subscriber visited
        # each location
        times_visited = """
        SELECT 
            subscriber_locs.subscriber, 
            {rc}, 
            count(*) AS total
        FROM ({subscriber_locs}) AS subscriber_locs
        GROUP BY subscriber_locs.subscriber, {rc}
        """.format(subscriber_locs=subscriber_query, rc=relevant_columns)

        sql = """
        SELECT 
            ranked.subscriber, 
            {rc}
        FROM
             (SELECT times_visited.subscriber, {rc},
             row_number() OVER (PARTITION BY times_visited.subscriber
              ORDER BY total DESC) AS rank
             FROM ({times_visited}) AS times_visited) AS ranked
        WHERE rank = 1
        """.format(times_visited=times_visited, rc=relevant_columns)

        return sql
Beispiel #7
0
    def __get_location_buffer(self):
        """
        Protected method for generating SQL
        for the buffer areas between a location
        (i..e an origin) and all its possible
        counterparts (i.e. destinations).
        """
        cols = get_columns_for_level(self.level)

        from_cols = ", ".join("{c}_from".format(c=c) for c in cols)
        to_cols = ", ".join("{c}_to".format(c=c) for c in cols)
        sql = """

            SELECT
                {froms},
                {tos},
                A.distance AS distance,
                A.geom_origin AS geom_origin,
                A.geom_destination AS geom_destination,
                ST_Buffer(A.geom_destination::geography, A.distance * 1000) AS geom_buffer
            FROM ({distance_matrix_table}) AS A

        """.format(
            distance_matrix_table=self.distance_matrix.get_query(),
            froms=from_cols,
            tos=to_cols,
        )

        return sql
 def __init__(
     self,
     *,
     meaningful_locations: MeaningfulLocations,
     level: str = "admin3",
     column_name: Union[str, None, List[str]] = None,
     polygon_table: str = None,
     geom_column: str = "geom",
     size: int = None,
 ) -> None:
     self.meaningful_locations = meaningful_locations
     level_cols = get_columns_for_level(level, column_name)
     self.column_name = column_name
     self.level = level
     if level.startswith("admin"):
         if level_cols == ["pcod"]:
             level_cols = [f"{level}pcod"]
         self.aggregator = GeoTable(f"geography.{level}",
                                    geom_column="geom",
                                    columns=["geom"] + level_cols)
     elif level == "polygon":
         self.aggregator = GeoTable(
             polygon_table,
             geom_column=geom_column,
             columns=[geom_column] + level_cols,
         )
     elif level == "grid":
         self.aggregator = Grid(size=size)
     else:
         raise BadLevelError(
             f"'{level}' is not an allowed level for meaningful locations, must be one of {MeaningfulLocationsOD.allowed_levels}'"
         )
     super().__init__()
Beispiel #9
0
    def __init__(
        self,
        start,
        stop,
        *,
        level="cell",
        column_name=None,
        subscriber_identifier="msisdn",
        hours="all",
        subscriber_subset=None,
        tables="all",
        ignore_nulls=True,
    ):

        self.subscriber_locations = subscriber_locations(
            start=start,
            stop=stop,
            level=level,
            column_name=column_name,
            table=tables,
            hours=hours,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
            ignore_nulls=ignore_nulls,
        )
        self.location_cols = ", ".join(
            get_columns_for_level(level=level, column_name=column_name)
        )
        super().__init__()
Beispiel #10
0
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        Returns a sorted calldays table.
        """
        relevant_columns = ", ".join(
            get_columns_for_level(self.ul.level, self.ul.column_name))

        sql = f"""
        SELECT * FROM (
            SELECT
                connections.subscriber,
                {relevant_columns},
                COUNT(*) AS calldays
            FROM (
                SELECT DISTINCT locations.subscriber, {relevant_columns}, locations.time::date
                FROM ({self.ul.get_query()}) AS locations
            ) AS connections
            GROUP BY connections.subscriber, {relevant_columns}
        ) calldays
        ORDER BY calldays.subscriber ASC, calldays.calldays DESC
        """

        return sql
Beispiel #11
0
    def _make_query(self):

        # to_char('2016-01-01'::date, 'day');
        # select extract(hour from timestamp '2001-02-16 20:38:40');

        day_of_week_and_hour_added = f"""SELECT *, 
        trim(to_char(datetime, 'day')) as dow, extract(hour from datetime) as hour 
        FROM ({self.sds.get_query()}) _"""

        hour_case = f"""(CASE 
        {" ".join(f"WHEN hour={hour} THEN {score}" for hour, score in enumerate(self.score_hour))}
        END)"""

        day_case = f"""(CASE 
                {" ".join(f"WHEN dow='{dow}' THEN {score}" for dow, score in self.score_dow.items())}
                END)"""

        location_cols = get_columns_for_level(self.level, self.column_name)

        query = f"""
        SELECT subscriber, {", ".join(location_cols)}, datetime,
               {hour_case} AS score_hour,
               {day_case} AS score_dow
        FROM ({day_of_week_and_hour_added}) AS subset_dates
        """

        location_cols = ", ".join(f"scores.{c}" for c in location_cols)

        return f"""
Beispiel #12
0
 def _make_query(self):
     loc_cols = ", ".join(
         get_columns_for_level(self.level, self.column_name))
     where_clause = ""
     if self.direction != "both":
         where_clause = "WHERE {}outgoing".format("" if self.direction ==
                                                  "out" else "NOT ")
     return f"""
Beispiel #13
0
def test_column_list():
    """
    Test that supplying the column name as a list returns it as a new list.
    """
    passed_cols = ["frogs", "dogs"]
    returned_cols = get_columns_for_level("admin0", passed_cols)
    assert passed_cols == returned_cols
    assert id(passed_cols) != id(returned_cols)
Beispiel #14
0
 def column_names(self) -> List[str]:
     cols = get_columns_for_level(self.level, self.column_name) + ["date"]
     if self.interval in ["hour", "min"]:
         cols += ["hour"]
     if self.interval == "min":
         cols += ["min"]
     cols += ["total"]
     return cols
Beispiel #15
0
    def _make_query(self):
        """
        Protected method that generates SQL
        that calculates the population that is
        covered by a buffer.
        """
        cols = get_columns_for_level(self.level)

        from_cols = ", ".join("B.{c}_from".format(c=c) for c in cols)
        outer_from_cols = ", ".join("C.{c}_from".format(c=c) for c in cols)
        to_cols = ", ".join("B.{c}_to".format(c=c) for c in cols)
        outer_to_cols = ", ".join("C.{c}_to".format(c=c) for c in cols)
        pop_join = " AND ".join("A.{c} = B.{c}_to".format(c=c) for c in cols)
        sql = """
            SELECT row_number() OVER(ORDER BY C.geom_buffer) AS id,
                {froms_outer},
                {tos_outer},
                C.distance,
                C.geom_buffer,
                sum(C.destination_population) AS buffer_population,
                count(*) AS n_sites
                FROM
                (SELECT
                    {froms},
                    {tos},
                    B.distance,
                    B.geom_buffer,
                    A.destination_population
                FROM (
                    SELECT DISTINCT ON ({tos})
                        {tos},
                        B.geom_destination,
                        A.total AS destination_population
                    FROM ({population_table}) AS A
                    JOIN ({location_buffer_table}) AS B
                        ON {pop_join}
                ) AS A
                INNER JOIN ({location_buffer_table}) AS B
                    ON ST_Intersects(B.geom_buffer::geography,          
                                    A.geom_destination::geography)) AS C
            GROUP BY {froms_outer},
                    {tos_outer},
                    C.distance,
                    C.geom_buffer

        """.format(
            population_table=self.population_object.get_query(),
            location_buffer_table=self.__get_location_buffer(),
            pop_join=pop_join,
            froms=from_cols,
            tos=to_cols,
            froms_outer=outer_from_cols,
            tos_outer=outer_to_cols,
        )

        return sql
    def _make_query(self):
        agg_query, agg_cols = self.aggregator._geo_augmented_query()
        level_cols = get_columns_for_level(self.level, self.column_name)
        level_cols_aliased = level_cols
        if level_cols == ["pcod"]:
            level_cols_aliased = [f"{self.level}pcod as pcod"]

        level_cols = ", ".join(level_cols)
        level_cols_aliased = ", ".join(level_cols_aliased)
        return f"""
    def _make_query(self):
        loc_cols = ", ".join(get_columns_for_level(self.level, self.column_name))

        where_clause = ""
        if self.direction != "both":
            where_clause = (
                f"WHERE outgoing IS {'TRUE' if self.direction == 'out' else 'FALSE'}"
            )

        return f"""
Beispiel #18
0
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        Returns a sorted calldays table.
        """
        relevant_columns = ", ".join(
            get_columns_for_level(self.ul.level, self.ul.column_name))

        return f"""
Beispiel #19
0
 def _make_query(self):
     cols = ",".join(get_columns_for_level(self.network_object))
     group_cols = ",".join(
         get_columns_for_level(self.level, self.joined.column_name)
     )
     return """
     SELECT {group_cols}, COUNT(*) as total,
          datetime FROM
           (SELECT DISTINCT {group_cols}, {cols}, datetime FROM           
             (SELECT {group_cols}, {cols}, date_trunc('{period}', x.datetime) AS datetime
             FROM ({etu}) x) y) _
         GROUP BY {group_cols}, datetime
         ORDER BY {group_cols}, datetime
     """.format(
         period=self.period,
         etu=self.joined.get_query(),
         cols=cols,
         group_cols=group_cols,
     )
    def _make_query(self):

        right_columns = get_columns_for_level(self.level, self.column_name)
        left_columns = self.left.column_names
        if "location_id" in right_columns and "location_id" in left_columns:
            left_columns.remove("location_id")

        right_columns_str = ", ".join(f"sites.{c}" for c in right_columns)
        left_columns_str = ", ".join(f"l.{c}" for c in left_columns)

        return f"""
Beispiel #21
0
    def index_cols(self):
        """
        A list of columns to use as indexes when storing this query.


        Returns
        -------
        ixen : list
            By default, returns the location columns if they are present
            and self.level is defined, and the subscriber column.

        Examples
        --------
        >>> daily_location("2016-01-01").index_cols
        [['name'], '"subscriber"']
        """
        from flowmachine.utils import (
            get_columns_for_level,
        )  # Local import to avoid circular import

        cols = self.column_names
        ixen = []
        try:
            # Not all objects define the attribute column_name so we'll fall
            # back to the default if it is not defined
            try:
                loc_cols = get_columns_for_level(self.level, self.column_name)
            except AttributeError:
                loc_cols = get_columns_for_level(self.level)
            if set(loc_cols).issubset(cols):
                ixen.append(loc_cols)
        except AttributeError:
            pass
        try:
            if self.subscriber_identifier in cols:
                ixen.append(self.subscriber_identifier)
            elif "subscriber" in cols:
                ixen.append('"subscriber"')
        except AttributeError:
            pass
        return ixen
    def _make_query(self):
        agg_query, agg_cols = self.aggregator._geo_augmented_query()
        level_cols = [
            f"{col}_{direction}"
            for col in get_columns_for_level(self.level, self.column_name)
            for direction in ("from", "to")
        ]
        level_cols_aliased = [
            f"{direction}_q.{col} as {col}_{direction}"
            for col in get_columns_for_level(self.level, self.column_name)
            for direction in ("from", "to")
        ]
        if level_cols == ["pcod_from", "pcod_to"]:
            level_cols_aliased = [
                f"from_q.{self.level}pcod as pcod_from",
                f"to_q.{self.level}pcod as pcod_to",
            ]

        level_cols = ", ".join(level_cols)
        level_cols_aliased = ", ".join(level_cols_aliased)
        return f"""
Beispiel #23
0
    def column_names(self) -> List[str]:
        cols = get_columns_for_level(self.level)

        cols.remove("lat")
        cols.remove("lon")

        col_names = [f"{c}_from" for c in cols]
        col_names += [f"{c}_to" for c in cols]
        col_names += [f"{c}_from" for c in ("lon", "lat")]
        col_names += [f"{c}_to" for c in ("lon", "lat")]
        col_names += ["distance"]
        if self.return_geometry:
            col_names += ["geom_origin", "geom_destination"]
        return col_names
Beispiel #24
0
    def _make_query(self):

        loc_cols = ", ".join(
            get_columns_for_level(self.level, self.column_name))

        sql = f"""
        SELECT
            subscriber,
            {loc_cols}
        FROM
            ({self.pslds_subset.get_query()}) _
        """

        return sql
Beispiel #25
0
    def _append_date(self, dl):
        """
        Takes a daily location object and returns a query representing that
        daily location, but with an additional (constant) column with the
        date to which that daily-loc applies.

        Returns
        -------
        CustomQuery
        """

        date_string = f"to_date('{dl.start}','YYYY-MM-DD') AS date"
        sql = f"SELECT *, {date_string} FROM ({dl.get_query()}) AS dl"
        return CustomQuery(
            sql, get_columns_for_level(self.level, self.column_name) + ["date"]
        )
 def _make_query(self):
     group_cols = ",".join(
         get_columns_for_level(self.total_objs.level,
                               self.total_objs.joined.column_name))
     sql = """
     SELECT {group_cols}, {stat}(z.total) as {stat},
     date_trunc('{by}', z.datetime) as datetime FROM 
         ({totals}) z
     GROUP BY {group_cols}, date_trunc('{by}', z.datetime)
     ORDER BY {group_cols}, date_trunc('{by}', z.datetime)
     """.format(
         by=self.by,
         stat=self.statistic,
         totals=self.total_objs.get_query(),
         group_cols=group_cols,
     )
     return sql
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        """

        relevant_columns = ",".join(
            get_columns_for_level(self.level, self.column_name))
        return """
        SELECT {rc}, COUNT(unique_subscribers) AS unique_subscriber_counts FROM 
        (SELECT 
            DISTINCT
            {rc},
            all_locs.subscriber as unique_subscribers
        FROM ({all_locs}) AS all_locs) AS _
        GROUP BY {rc} 
        """.format(all_locs=self.ul.get_query(), rc=relevant_columns)
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        """
        relevant_columns = ",".join(
            get_columns_for_level(self.level, self.column_name))

        return """
        SELECT final_time.subscriber, {rc}
        FROM
             (SELECT subscriber_locs.subscriber, time, {rc},
             row_number() OVER (PARTITION BY subscriber_locs.subscriber ORDER BY time DESC)
                 AS rank
             FROM ({subscriber_locs}) AS subscriber_locs) AS final_time
        WHERE rank = 1
        """.format(subscriber_locs=self.subscriber_locs.get_query(),
                   rc=relevant_columns)
Beispiel #29
0
    def _make_query(self):
        cols = get_columns_for_level(self.level)
        sql_location_table = "SELECT * FROM infrastructure." + (
            "sites" if self.level == "versioned-site" else "cells")
        cols.remove("lat")
        cols.remove("lon")

        from_cols = ", ".join("A.{c_id_safe} AS {c}_from".format(
            c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols)
        to_cols = ", ".join("B.{c_id_safe} AS {c}_to".format(
            c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols)

        return_geometry_statement = ""
        if self.return_geometry:
            return_geometry_statement = """
                ,
                A.geom_point AS geom_origin,
                B.geom_point AS geom_destination
            """

        return """

            SELECT
                {froms},
                {tos},
                ST_X(A.geom_point::geometry) AS lon_from,
                ST_Y(A.geom_point::geometry) AS lat_from,
                ST_X(B.geom_point::geometry) AS lon_to,
                ST_Y(B.geom_point::geometry) AS lat_to,
                ST_Distance(
                    A.geom_point::geography, 
                    B.geom_point::geography
                ) / 1000 AS distance
                {return_geometry_statement}
            FROM ({location_table_statement}) AS A
            CROSS JOIN ({location_table_statement}) AS B
            ORDER BY distance DESC
            
        """.format(
            location_table_statement=sql_location_table,
            froms=from_cols,
            tos=to_cols,
            return_geometry_statement=return_geometry_statement,
        )
    def _make_query(self):
        """
        Default query method implemented in the
        metaclass Query().
        """

        column_name = get_columns_for_level(self.ul.level)

        clause = self._get_locations_clause(self.location, column_name)

        return """
        SELECT 
            relevant_locs.subscriber,
            min(time) AS time
        FROM
            (SELECT * FROM ({subscriber_locs}) AS subscriber_locs
            {clause}) AS relevant_locs
        GROUP BY relevant_locs.subscriber
        """.format(subscriber_locs=self.ul.get_query(), clause=clause)