def column_names(self) -> List[str]: return ( ["subscriber", "msisdn_counterpart"] + get_columns_for_level(self.level, self.column_name) + [ f"{x}_counterpart" for x in get_columns_for_level(self.level, self.column_name) ] + [f"duration_{self.statistic}"])
def _make_query(self): loc_cols = get_columns_for_level(self.level, self.column_name) loc_cols += [ "{}_counterpart".format(c) for c in get_columns_for_level(self.level, self.column_name) ] loc_cols = ", ".join(loc_cols) return f"""
def _make_query(self): """ Default query method implemented in the metaclass Query(). """ relevant_columns = ", ".join( get_columns_for_level(self.level, self.column_name)) sql = """ SELECT day_trajectories.subscriber, day_trajectories.{rc}, COUNT(*) AS dl_count FROM ({day_trajectories}) AS day_trajectories GROUP BY day_trajectories.subscriber, day_trajectories.{rc} ORDER BY day_trajectories.subscriber, COUNT(*) DESC """.format(rc=relevant_columns, day_trajectories=self.day_trajectories.get_query()) return sql
def _make_query(self): right_columns = get_columns_for_level(self.level, self.column_name) left_columns = self.left.column_names if "location_id" in right_columns and "location_id" in left_columns: left_columns.remove("location_id") right_columns_str = ", ".join([f"sites.{c}" for c in right_columns]) left_columns_str = ", ".join([f"l.{c}" for c in left_columns]) sql = f""" SELECT {left_columns_str}, {right_columns_str} FROM ({self.left.get_query()}) AS l INNER JOIN ({self.right_query.get_query()}) AS sites ON l.location_id = sites.location_id AND l.{self.time_col}::date BETWEEN coalesce(sites.date_of_first_service, '-infinity'::timestamptz) AND coalesce(sites.date_of_last_service, 'infinity'::timestamptz) """ return sql
def _make_query(self): # Grouped now represents a query with activities on the level of the cell, # if that is what the user has asked for then we are done, otherwise # we need to do the appropriate join and do a further group by. if self.level == "cell": sql = self._obj.get_query() cols = self._obj.groups # Otherwise we're after lat-lon, or an admin region. # in either case we need to join with the cell data else: cols = ", ".join( get_columns_for_level(self.level, self.column_name)) cols += ", " + self._obj.time_col if self.interval == "hour" or self.interval == "min": cols += ",hour" if self.interval == "min": cols += ",min" sql = """ SELECT {cols}, sum(total) AS total FROM ({j}) AS j GROUP BY {cols} ORDER BY {cols} """.format(cols=cols, j=self._obj.get_query()) return sql
def _make_query(self): """ Default query method implemented in the metaclass Query(). """ subscriber_query = "{} ORDER BY time".format( self.subscriber_locs.get_query()) relevant_columns = ", ".join( get_columns_for_level(self.level, self.column_name)) # Create a table which has the total times each subscriber visited # each location times_visited = """ SELECT subscriber_locs.subscriber, {rc}, count(*) AS total FROM ({subscriber_locs}) AS subscriber_locs GROUP BY subscriber_locs.subscriber, {rc} """.format(subscriber_locs=subscriber_query, rc=relevant_columns) sql = """ SELECT ranked.subscriber, {rc} FROM (SELECT times_visited.subscriber, {rc}, row_number() OVER (PARTITION BY times_visited.subscriber ORDER BY total DESC) AS rank FROM ({times_visited}) AS times_visited) AS ranked WHERE rank = 1 """.format(times_visited=times_visited, rc=relevant_columns) return sql
def __get_location_buffer(self): """ Protected method for generating SQL for the buffer areas between a location (i..e an origin) and all its possible counterparts (i.e. destinations). """ cols = get_columns_for_level(self.level) from_cols = ", ".join("{c}_from".format(c=c) for c in cols) to_cols = ", ".join("{c}_to".format(c=c) for c in cols) sql = """ SELECT {froms}, {tos}, A.distance AS distance, A.geom_origin AS geom_origin, A.geom_destination AS geom_destination, ST_Buffer(A.geom_destination::geography, A.distance * 1000) AS geom_buffer FROM ({distance_matrix_table}) AS A """.format( distance_matrix_table=self.distance_matrix.get_query(), froms=from_cols, tos=to_cols, ) return sql
def __init__( self, *, meaningful_locations: MeaningfulLocations, level: str = "admin3", column_name: Union[str, None, List[str]] = None, polygon_table: str = None, geom_column: str = "geom", size: int = None, ) -> None: self.meaningful_locations = meaningful_locations level_cols = get_columns_for_level(level, column_name) self.column_name = column_name self.level = level if level.startswith("admin"): if level_cols == ["pcod"]: level_cols = [f"{level}pcod"] self.aggregator = GeoTable(f"geography.{level}", geom_column="geom", columns=["geom"] + level_cols) elif level == "polygon": self.aggregator = GeoTable( polygon_table, geom_column=geom_column, columns=[geom_column] + level_cols, ) elif level == "grid": self.aggregator = Grid(size=size) else: raise BadLevelError( f"'{level}' is not an allowed level for meaningful locations, must be one of {MeaningfulLocationsOD.allowed_levels}'" ) super().__init__()
def __init__( self, start, stop, *, level="cell", column_name=None, subscriber_identifier="msisdn", hours="all", subscriber_subset=None, tables="all", ignore_nulls=True, ): self.subscriber_locations = subscriber_locations( start=start, stop=stop, level=level, column_name=column_name, table=tables, hours=hours, subscriber_identifier=subscriber_identifier, subscriber_subset=subscriber_subset, ignore_nulls=ignore_nulls, ) self.location_cols = ", ".join( get_columns_for_level(level=level, column_name=column_name) ) super().__init__()
def _make_query(self): """ Default query method implemented in the metaclass Query(). Returns a sorted calldays table. """ relevant_columns = ", ".join( get_columns_for_level(self.ul.level, self.ul.column_name)) sql = f""" SELECT * FROM ( SELECT connections.subscriber, {relevant_columns}, COUNT(*) AS calldays FROM ( SELECT DISTINCT locations.subscriber, {relevant_columns}, locations.time::date FROM ({self.ul.get_query()}) AS locations ) AS connections GROUP BY connections.subscriber, {relevant_columns} ) calldays ORDER BY calldays.subscriber ASC, calldays.calldays DESC """ return sql
def _make_query(self): # to_char('2016-01-01'::date, 'day'); # select extract(hour from timestamp '2001-02-16 20:38:40'); day_of_week_and_hour_added = f"""SELECT *, trim(to_char(datetime, 'day')) as dow, extract(hour from datetime) as hour FROM ({self.sds.get_query()}) _""" hour_case = f"""(CASE {" ".join(f"WHEN hour={hour} THEN {score}" for hour, score in enumerate(self.score_hour))} END)""" day_case = f"""(CASE {" ".join(f"WHEN dow='{dow}' THEN {score}" for dow, score in self.score_dow.items())} END)""" location_cols = get_columns_for_level(self.level, self.column_name) query = f""" SELECT subscriber, {", ".join(location_cols)}, datetime, {hour_case} AS score_hour, {day_case} AS score_dow FROM ({day_of_week_and_hour_added}) AS subset_dates """ location_cols = ", ".join(f"scores.{c}" for c in location_cols) return f"""
def _make_query(self): loc_cols = ", ".join( get_columns_for_level(self.level, self.column_name)) where_clause = "" if self.direction != "both": where_clause = "WHERE {}outgoing".format("" if self.direction == "out" else "NOT ") return f"""
def test_column_list(): """ Test that supplying the column name as a list returns it as a new list. """ passed_cols = ["frogs", "dogs"] returned_cols = get_columns_for_level("admin0", passed_cols) assert passed_cols == returned_cols assert id(passed_cols) != id(returned_cols)
def column_names(self) -> List[str]: cols = get_columns_for_level(self.level, self.column_name) + ["date"] if self.interval in ["hour", "min"]: cols += ["hour"] if self.interval == "min": cols += ["min"] cols += ["total"] return cols
def _make_query(self): """ Protected method that generates SQL that calculates the population that is covered by a buffer. """ cols = get_columns_for_level(self.level) from_cols = ", ".join("B.{c}_from".format(c=c) for c in cols) outer_from_cols = ", ".join("C.{c}_from".format(c=c) for c in cols) to_cols = ", ".join("B.{c}_to".format(c=c) for c in cols) outer_to_cols = ", ".join("C.{c}_to".format(c=c) for c in cols) pop_join = " AND ".join("A.{c} = B.{c}_to".format(c=c) for c in cols) sql = """ SELECT row_number() OVER(ORDER BY C.geom_buffer) AS id, {froms_outer}, {tos_outer}, C.distance, C.geom_buffer, sum(C.destination_population) AS buffer_population, count(*) AS n_sites FROM (SELECT {froms}, {tos}, B.distance, B.geom_buffer, A.destination_population FROM ( SELECT DISTINCT ON ({tos}) {tos}, B.geom_destination, A.total AS destination_population FROM ({population_table}) AS A JOIN ({location_buffer_table}) AS B ON {pop_join} ) AS A INNER JOIN ({location_buffer_table}) AS B ON ST_Intersects(B.geom_buffer::geography, A.geom_destination::geography)) AS C GROUP BY {froms_outer}, {tos_outer}, C.distance, C.geom_buffer """.format( population_table=self.population_object.get_query(), location_buffer_table=self.__get_location_buffer(), pop_join=pop_join, froms=from_cols, tos=to_cols, froms_outer=outer_from_cols, tos_outer=outer_to_cols, ) return sql
def _make_query(self): agg_query, agg_cols = self.aggregator._geo_augmented_query() level_cols = get_columns_for_level(self.level, self.column_name) level_cols_aliased = level_cols if level_cols == ["pcod"]: level_cols_aliased = [f"{self.level}pcod as pcod"] level_cols = ", ".join(level_cols) level_cols_aliased = ", ".join(level_cols_aliased) return f"""
def _make_query(self): loc_cols = ", ".join(get_columns_for_level(self.level, self.column_name)) where_clause = "" if self.direction != "both": where_clause = ( f"WHERE outgoing IS {'TRUE' if self.direction == 'out' else 'FALSE'}" ) return f"""
def _make_query(self): """ Default query method implemented in the metaclass Query(). Returns a sorted calldays table. """ relevant_columns = ", ".join( get_columns_for_level(self.ul.level, self.ul.column_name)) return f"""
def _make_query(self): cols = ",".join(get_columns_for_level(self.network_object)) group_cols = ",".join( get_columns_for_level(self.level, self.joined.column_name) ) return """ SELECT {group_cols}, COUNT(*) as total, datetime FROM (SELECT DISTINCT {group_cols}, {cols}, datetime FROM (SELECT {group_cols}, {cols}, date_trunc('{period}', x.datetime) AS datetime FROM ({etu}) x) y) _ GROUP BY {group_cols}, datetime ORDER BY {group_cols}, datetime """.format( period=self.period, etu=self.joined.get_query(), cols=cols, group_cols=group_cols, )
def _make_query(self): right_columns = get_columns_for_level(self.level, self.column_name) left_columns = self.left.column_names if "location_id" in right_columns and "location_id" in left_columns: left_columns.remove("location_id") right_columns_str = ", ".join(f"sites.{c}" for c in right_columns) left_columns_str = ", ".join(f"l.{c}" for c in left_columns) return f"""
def index_cols(self): """ A list of columns to use as indexes when storing this query. Returns ------- ixen : list By default, returns the location columns if they are present and self.level is defined, and the subscriber column. Examples -------- >>> daily_location("2016-01-01").index_cols [['name'], '"subscriber"'] """ from flowmachine.utils import ( get_columns_for_level, ) # Local import to avoid circular import cols = self.column_names ixen = [] try: # Not all objects define the attribute column_name so we'll fall # back to the default if it is not defined try: loc_cols = get_columns_for_level(self.level, self.column_name) except AttributeError: loc_cols = get_columns_for_level(self.level) if set(loc_cols).issubset(cols): ixen.append(loc_cols) except AttributeError: pass try: if self.subscriber_identifier in cols: ixen.append(self.subscriber_identifier) elif "subscriber" in cols: ixen.append('"subscriber"') except AttributeError: pass return ixen
def _make_query(self): agg_query, agg_cols = self.aggregator._geo_augmented_query() level_cols = [ f"{col}_{direction}" for col in get_columns_for_level(self.level, self.column_name) for direction in ("from", "to") ] level_cols_aliased = [ f"{direction}_q.{col} as {col}_{direction}" for col in get_columns_for_level(self.level, self.column_name) for direction in ("from", "to") ] if level_cols == ["pcod_from", "pcod_to"]: level_cols_aliased = [ f"from_q.{self.level}pcod as pcod_from", f"to_q.{self.level}pcod as pcod_to", ] level_cols = ", ".join(level_cols) level_cols_aliased = ", ".join(level_cols_aliased) return f"""
def column_names(self) -> List[str]: cols = get_columns_for_level(self.level) cols.remove("lat") cols.remove("lon") col_names = [f"{c}_from" for c in cols] col_names += [f"{c}_to" for c in cols] col_names += [f"{c}_from" for c in ("lon", "lat")] col_names += [f"{c}_to" for c in ("lon", "lat")] col_names += ["distance"] if self.return_geometry: col_names += ["geom_origin", "geom_destination"] return col_names
def _make_query(self): loc_cols = ", ".join( get_columns_for_level(self.level, self.column_name)) sql = f""" SELECT subscriber, {loc_cols} FROM ({self.pslds_subset.get_query()}) _ """ return sql
def _append_date(self, dl): """ Takes a daily location object and returns a query representing that daily location, but with an additional (constant) column with the date to which that daily-loc applies. Returns ------- CustomQuery """ date_string = f"to_date('{dl.start}','YYYY-MM-DD') AS date" sql = f"SELECT *, {date_string} FROM ({dl.get_query()}) AS dl" return CustomQuery( sql, get_columns_for_level(self.level, self.column_name) + ["date"] )
def _make_query(self): group_cols = ",".join( get_columns_for_level(self.total_objs.level, self.total_objs.joined.column_name)) sql = """ SELECT {group_cols}, {stat}(z.total) as {stat}, date_trunc('{by}', z.datetime) as datetime FROM ({totals}) z GROUP BY {group_cols}, date_trunc('{by}', z.datetime) ORDER BY {group_cols}, date_trunc('{by}', z.datetime) """.format( by=self.by, stat=self.statistic, totals=self.total_objs.get_query(), group_cols=group_cols, ) return sql
def _make_query(self): """ Default query method implemented in the metaclass Query(). """ relevant_columns = ",".join( get_columns_for_level(self.level, self.column_name)) return """ SELECT {rc}, COUNT(unique_subscribers) AS unique_subscriber_counts FROM (SELECT DISTINCT {rc}, all_locs.subscriber as unique_subscribers FROM ({all_locs}) AS all_locs) AS _ GROUP BY {rc} """.format(all_locs=self.ul.get_query(), rc=relevant_columns)
def _make_query(self): """ Default query method implemented in the metaclass Query(). """ relevant_columns = ",".join( get_columns_for_level(self.level, self.column_name)) return """ SELECT final_time.subscriber, {rc} FROM (SELECT subscriber_locs.subscriber, time, {rc}, row_number() OVER (PARTITION BY subscriber_locs.subscriber ORDER BY time DESC) AS rank FROM ({subscriber_locs}) AS subscriber_locs) AS final_time WHERE rank = 1 """.format(subscriber_locs=self.subscriber_locs.get_query(), rc=relevant_columns)
def _make_query(self): cols = get_columns_for_level(self.level) sql_location_table = "SELECT * FROM infrastructure." + ( "sites" if self.level == "versioned-site" else "cells") cols.remove("lat") cols.remove("lon") from_cols = ", ".join("A.{c_id_safe} AS {c}_from".format( c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols) to_cols = ", ".join("B.{c_id_safe} AS {c}_to".format( c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols) return_geometry_statement = "" if self.return_geometry: return_geometry_statement = """ , A.geom_point AS geom_origin, B.geom_point AS geom_destination """ return """ SELECT {froms}, {tos}, ST_X(A.geom_point::geometry) AS lon_from, ST_Y(A.geom_point::geometry) AS lat_from, ST_X(B.geom_point::geometry) AS lon_to, ST_Y(B.geom_point::geometry) AS lat_to, ST_Distance( A.geom_point::geography, B.geom_point::geography ) / 1000 AS distance {return_geometry_statement} FROM ({location_table_statement}) AS A CROSS JOIN ({location_table_statement}) AS B ORDER BY distance DESC """.format( location_table_statement=sql_location_table, froms=from_cols, tos=to_cols, return_geometry_statement=return_geometry_statement, )
def _make_query(self): """ Default query method implemented in the metaclass Query(). """ column_name = get_columns_for_level(self.ul.level) clause = self._get_locations_clause(self.location, column_name) return """ SELECT relevant_locs.subscriber, min(time) AS time FROM (SELECT * FROM ({subscriber_locs}) AS subscriber_locs {clause}) AS relevant_locs GROUP BY relevant_locs.subscriber """.format(subscriber_locs=self.ul.get_query(), clause=clause)