Exemple #1
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        spatial_unit: Optional[LonLatSpatialUnit] = None,
        departure_rate: Union[pd.DataFrame, float] = 0.1,
        hours: Union[str, Tuple[int, int]] = "all",
        method: str = "last",
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
    ):

        warnings.warn(
            "The PopulationWeightedOpportunities model is currently **experimental**. "
            + "Please review Yan X-Y et al. " +
            "(http://dx.doi.org/10.1098/rsif.2014.0834) " +
            "before using this model in production.")

        if isinstance(departure_rate, pd.DataFrame):
            # Rename the columns to match what we'll join to
            # sort the dataframe so we'll have a consistent md5
            self.departure_rate = departure_rate.rename(
                columns=lambda x: x if x == "rate" else f"{x}_from").apply(
                    lambda x: x.sort_values().values)
            self.departure_rate = self.departure_rate.reindex(
                columns=sorted(self.departure_rate.columns))
        elif isinstance(departure_rate, float):
            self.departure_rate = departure_rate
        else:
            raise TypeError(f"{departure_rate} must be a float or dataframe")
        self.start = start
        self.stop = stop
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("versioned-site")
        else:
            self.spatial_unit = spatial_unit
        self.distance_matrix = DistanceMatrix(spatial_unit=self.spatial_unit,
                                              return_geometry=True)

        self.population_object = ModalLocation(*[
            daily_location(
                d,
                spatial_unit=self.spatial_unit,
                hours=hours,
                method=method,
                table=table,
                subscriber_identifier=subscriber_identifier,
                ignore_nulls=True,
                subscriber_subset=subscriber_subset,
            ) for d in list_of_dates(self.start, self.stop)
        ]).aggregate()

        self.population_buffer_object = _PopulationBuffer(
            population_object=self.population_object,
            distance_matrix=self.distance_matrix,
        )
    def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        hours="all",
        tables="all",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
        exclude_self_calls=True,
    ):
        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.direction = Direction(direction)
        self.exclude_self_calls = exclude_self_calls

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [
            "msisdn", "msisdn_counterpart", "id", "location_id", "outgoing"
        ]
        self.tables = tables

        # EventsTablesUnion will only subset on the subscriber identifier,
        # which means that we need to query for a unioned table twice. That has
        # a considerable negative impact on execution time.
        self.unioned_from_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.unioned_to_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn_counterpart",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.distance_matrix = DistanceMatrix()

        super().__init__()
def distance_matrix(get_dataframe):
    """ Calculates the distance matrix between cells. """
    return get_dataframe(DistanceMatrix())