def _make_query(self): """ Default query method implemented in the metaclass Query(). Returns a sorted calldays table. """ relevant_columns = ", ".join( get_columns_for_level(self.level, self.column_name)) sql = f""" SELECT * FROM ( SELECT connections.subscriber, {relevant_columns}, COUNT(*) AS calldays FROM ( SELECT DISTINCT locations.subscriber, {relevant_columns}, locations.time::date FROM ({self.ul.get_query()}) AS locations ) AS connections GROUP BY connections.subscriber, {relevant_columns} ) calldays ORDER BY calldays.subscriber ASC, calldays.calldays DESC """ return sql
def __get_location_buffer(self): """ Protected method for generating SQL for the buffer areas between a location (i..e an origin) and all its possible counterparts (i.e. destinations). """ cols = get_columns_for_level(self.level) from_cols = ", ".join("{c}_from".format(c=c) for c in cols) to_cols = ", ".join("{c}_to".format(c=c) for c in cols) sql = """ SELECT {froms}, {tos}, A.distance AS distance, A.geom_origin AS geom_origin, A.geom_destination AS geom_destination, ST_Buffer(A.geom_destination::geography, A.distance * 1000) AS geom_buffer FROM ({distance_matrix_table}) AS A """.format( distance_matrix_table=self.distance_matrix.get_query(), froms=from_cols, tos=to_cols, ) return sql
def _make_query(self): """ Protected method that generates SQL that calculates the population that is covered by a buffer. """ cols = get_columns_for_level(self.level) from_cols = ", ".join("B.{c}_from".format(c=c) for c in cols) outer_from_cols = ", ".join("C.{c}_from".format(c=c) for c in cols) to_cols = ", ".join("B.{c}_to".format(c=c) for c in cols) outer_to_cols = ", ".join("C.{c}_to".format(c=c) for c in cols) pop_join = " AND ".join("A.{c} = B.{c}_to".format(c=c) for c in cols) sql = """ SELECT row_number() OVER(ORDER BY C.geom_buffer) AS id, {froms_outer}, {tos_outer}, C.distance, C.geom_buffer, sum(C.destination_population) AS buffer_population, count(*) AS n_sites FROM (SELECT {froms}, {tos}, B.distance, B.geom_buffer, A.destination_population FROM ( SELECT DISTINCT ON ({tos}) {tos}, B.geom_destination, A.total AS destination_population FROM ({population_table}) AS A JOIN ({location_buffer_table}) AS B ON {pop_join} ) AS A INNER JOIN ({location_buffer_table}) AS B ON ST_Intersects(B.geom_buffer::geography, A.geom_destination::geography)) AS C GROUP BY {froms_outer}, {tos_outer}, C.distance, C.geom_buffer """.format( population_table=self.population_object.get_query(), location_buffer_table=self.__get_location_buffer(), pop_join=pop_join, froms=from_cols, tos=to_cols, froms_outer=outer_from_cols, tos_outer=outer_to_cols, ) return sql
def index_cols(self): """ A list of columns to use as indexes when storing this query. Returns ------- ixen : list By default, returns the location columns if they are present and self.level is defined, and the subscriber column. Examples -------- >>> daily_location("2016-01-01").index_cols [['name'], '"subscriber"'] """ from flowmachine.utils.utils import ( get_columns_for_level, ) # Local import to avoid circular import cols = self.column_names ixen = [] try: # Not all objects define the attribute column_name so we'll fall # back to the default if it is not defined try: loc_cols = get_columns_for_level(self.level, self.column_name) except AttributeError: loc_cols = get_columns_for_level(self.level) if set(loc_cols).issubset(cols): ixen.append(loc_cols) except AttributeError: pass try: if self.subscriber_identifier in cols: ixen.append(self.subscriber_identifier) else: ixen.append('"subscriber"') except AttributeError: pass return ixen
def _make_query(self): cols = get_columns_for_level(self.level) sql_location_table = "SELECT * FROM infrastructure." + ( "sites" if self.level == "versioned-site" else "cells") try: cols.remove("lat") cols.remove("lon") except ValueError: pass # Nothing to remove from_cols = ", ".join("A.{c_id_safe} AS {c}_from".format( c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols) to_cols = ", ".join("B.{c_id_safe} AS {c}_to".format( c_id_safe="id" if c.endswith("id") else c, c=c) for c in cols) return_geometry_statement = "" if self.return_geometry: return_geometry_statement = """ , A.geom_point AS geom_origin, B.geom_point AS geom_destination """ sql = """ SELECT {froms}, {tos}, ST_X(A.geom_point::geometry) AS lon_from, ST_Y(A.geom_point::geometry) AS lat_from, ST_X(B.geom_point::geometry) AS lon_to, ST_Y(B.geom_point::geometry) AS lat_to, ST_Distance( A.geom_point::geography, B.geom_point::geography ) / 1000 AS distance {return_geometry_statement} FROM ({location_table_statement}) AS A CROSS JOIN ({location_table_statement}) AS B ORDER BY distance DESC """.format( location_table_statement=sql_location_table, froms=from_cols, tos=to_cols, return_geometry_statement=return_geometry_statement, ) return sql
def test_column_list(): """Test that supplying the column name as a list returns it as a new list.""" passed_cols = ["frogs", "dogs"] returned_cols = get_columns_for_level("admin0", passed_cols) assert passed_cols == returned_cols assert id(passed_cols) != id(returned_cols)
def test_columns_for_level_errors(level, column_name, error): """Test that get_columns_for_level raises correct errors""" with pytest.raises(error): get_columns_for_level(level, column_name)
def column_names(self) -> List[str]: return (["subscriber"] + get_columns_for_level(self.level, self.column_name) + ["calldays"])
def run( self, uniform_departure_rate=0.1, departure_rate_vector=None, ignore_missing=False, ): """ Runs model. Parameters ---------- uniform_departure_rate : float Proportion of population from location i that will be departing in observed time period. This proportion applies to all locations uniformly. departure_rate_vector : dict A dictionary that contains the proportion of the population from locations i that have departed those locations. The keys of the dictionaries must be the location identifier and the values the departure rate. If passed, this will be used over the `uniform_departure_rate` parameter. ignore_missing : bool If True, existing locations that are not found in the departure_rate_vector dictionary will be computed using zero departures. Returns ------- A pandas dataframe with a mobility matrix. """ if "population_buffer" not in self.__dict__.keys(): logger.warn(" Computing Population() and DistanceMatrix() " + "objects. This can take a few minutes.") population_df = self.population_object.get_dataframe() population_buffer = self.population_buffer_object.get_dataframe() ix = get_columns_for_level(self.level) ix = ["{}_{}".format(c, d) for d in ("from", "to") for c in ix] population_buffer.set_index(ix, inplace=True) M = population_df["total"].sum() N = len(population_df[get_columns_for_level( self.level)].drop_duplicates()) beta = 1 / M locations = population_df[get_columns_for_level( self.level)].values.tolist() population_df.set_index(get_columns_for_level(self.level), inplace=True) if not departure_rate_vector: logger.warn(" Using an uniform departure " + "rate of {} for ".format(uniform_departure_rate) + "all locations.") elif not ignore_missing and len(departure_rate_vector) != len( locations): raise ValueError("Locations missing from " + "`departure_rate_vector`. Use " + "ignore_missing=True if locations " + "without rates should be ignored.") results = [] for i in locations: sigma = 0 m_i = self.__get_population(population_df, i) if departure_rate_vector: try: T_i = m_i * departure_rate_vector[i[0]] except KeyError: try: T_i = m_i * departure_rate_vector[tuple(i[:1])] except KeyError: T_i = 0 else: T_i = m_i * uniform_departure_rate for k in [l for l in locations if l != i]: m_k = self.__get_population(population_df, k) S_ik = self.__get_buffer_population(population_buffer, i, k) sigma += m_k * ((1 / S_ik) - beta) for j in [l for l in locations if l != i]: m_j = self.__get_population(population_df, j) S_ij = self.__get_buffer_population(population_buffer, i, j) T_ij = (T_i * m_j * ((1 / S_ij) - beta)) / sigma if T_i != 0: probability = T_ij / T_i else: probability = 0 results.append(i + j + [T_ij, probability]) ix = get_columns_for_level(self.level) ix = ["{}_{}".format(c, d) for d in ("from", "to") for c in ix] ix += ["prediction", "probability"] res = pd.DataFrame(results, columns=ix) return res