def __init__(self, *, metric, locations, method="avg"):
     self.metric = metric
     self.locations = locations
     # self.spatial_unit is used in self._geo_augmented_query
     self.spatial_unit = locations.spatial_unit
     self.method = method.lower()
     if self.method not in self.allowed_methods:
         raise ValueError(
             f"{method} is not recognised method, must be one of {self.allowed_methods}"
         )
     try:
         if (
             parse_datestring(self.metric.start).date()
             != parse_datestring(self.locations.start).date()
         ):
             warnings.warn(
                 f"{self.metric} and {self.locations} have different start dates: {self.metric.start}, and {self.locations.start}"
             )
         if (
             parse_datestring(self.metric.stop).date()
             != parse_datestring(self.locations.stop).date()
         ):
             warnings.warn(
                 f"{self.metric} and {self.locations} have different stop dates: {self.metric.stop}, and {self.locations.stop}"
             )
     except AttributeError:
         pass  # Not everything has a start/stop date
     super().__init__()
Esempio n. 2
0
 def __init__(self, metric, locations, method="mean"):
     self.metric = metric
     self.locations = locations
     self.level = locations.level
     self.column_name = locations.column_name
     self.method = method.lower()
     if self.method not in ("mean", "median", "mode"):
         raise ValueError("{} is not recognised method".format(method))
     try:
         if (parse_datestring(self.metric.start).date() != parse_datestring(
                 self.locations.start).date()):
             warnings.warn(
                 "{} and {} have different start dates: {}, and {}".format(
                     self.metric,
                     self.locations,
                     self.metric.start,
                     self.locations.start,
                 ))
         if (parse_datestring(self.metric.stop).date() != parse_datestring(
                 self.locations.stop).date()):
             warnings.warn(
                 "{} and {} have different stop dates: {}, and {}".format(
                     self.metric,
                     self.locations,
                     self.metric.stop,
                     self.locations.stop,
                 ))
     except AttributeError:
         pass  # Not everything has a start/stop date
     super().__init__()
Esempio n. 3
0
    def __init__(self, *daily_locations):
        # TODO: check that all the inputs are actually location objects (of an appropriate kind)

        self.start = str(
            min(
                parse_datestring(daily_location.start)
                for daily_location in daily_locations
            )
        )
        self.stop = str(
            max(
                parse_datestring(daily_location.start)
                for daily_location in daily_locations
            )
        )
        self._all_dls = daily_locations
        logger.info("ModalLocation using {} DailyLocations".format(len(self._all_dls)))
        logger.info(
            "{}/{} DailyLocations are pre-calculated.".format(
                sum(1 for dl in self._all_dls if dl.is_stored), len(self._all_dls)
            )
        )

        # Importing daily_location inputs
        # from first daily_location object.
        self.level = self._all_dls[0].level
        self.subscriber_identifier = self._all_dls[0].subscriber_identifier
        self.column_name = self._all_dls[0].column_name
        super().__init__()
Esempio n. 4
0
def test_parse():
    """
    Test that several variations on a datestring give the same date
    """
    assert (parse_datestring("2016-01-01").date() == parse_datestring(
        "2016-01-01 10:00").date())
    assert (parse_datestring("2016-01-01").date() == parse_datestring(
        "2016-01-01 10:00:00").date())
Esempio n. 5
0
    def __init__(self, start_date, end_date):
        self.start_date = parse_datestring(start_date)
        self.end_date = parse_datestring(end_date)
        self.start_date_as_str = standardise_date(start_date)
        self.end_date_as_str = standardise_date(end_date)

        self.one_day_past_end_date = self.end_date + dt.timedelta(days=1)
        self.one_day_past_end_date_as_str = standardise_date(
            self.one_day_past_end_date)
Esempio n. 6
0
    def __init__(self,
                 start,
                 stop,
                 modal_locations=None,
                 statistic="avg",
                 unit="km",
                 **kwargs):

        # need to subtract one day from hl end in order to be
        # comparing over same period...
        self.stop_sl = stop
        self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1))

        self.start = start

        allowed_levels = ["lat-lon", "versioned-cell", "versioned-site"]
        if modal_locations:
            if (isinstance(modal_locations, ModalLocation)
                    and modal_locations.level in allowed_levels):
                hl = modal_locations
            else:
                raise ValueError(
                    f"Argument 'modal_locations' should be an instance of ModalLocation class with level in {allowed_levels}"
                )
        else:
            hl = ModalLocation(*[
                daily_location(date, level="lat-lon", **kwargs)
                for date in list_of_dates(self.start, self.stop_hl)
            ])

        sl = subscriber_locations(self.start,
                                  self.stop_sl,
                                  level="lat-lon",
                                  **kwargs)

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        self.joined = hl.join(
            sl,
            on_left="subscriber",
            on_right="subscriber",
            how="left",
            left_append="_home_loc",
            right_append="",
        )

        self.unit = unit

        super().__init__()
 def __init__(self, *, metric, locations, method="mean"):
     self.metric = metric
     self.locations = locations
     self.level = locations.level
     self.column_name = locations.column_name
     self.method = method.lower()
     if self.method not in self.allowed_methods:
         raise ValueError(
             f"{method} is not recognised method, must be one of {self.allowed_methods}"
         )
     try:
         if (parse_datestring(self.metric.start).date() != parse_datestring(
                 self.locations.start).date()):
             warnings.warn(
                 f"{self.metric} and {self.locations} have different start dates: {self.metric.start}, and {self.locations.start}"
             )
         if (parse_datestring(self.metric.stop).date() != parse_datestring(
                 self.locations.stop).date()):
             warnings.warn(
                 f"{self.metric} and {self.locations} have different stop dates: {self.metric.stop}, and {self.locations.stop}"
             )
     except AttributeError:
         pass  # Not everything has a start/stop date
     super().__init__()
Esempio n. 8
0
def daily_location(
    date,
    stop=None,
    *,
    spatial_unit: Optional[AnySpatialUnit] = None,
    hours: Optional[Tuple[int, int]] = None,
    method="last",
    table="all",
    subscriber_identifier="msisdn",
    ignore_nulls=True,
    subscriber_subset=None,
):
    """
    Return a query for locating all subscribers on a single day of data.

    Parameters
    ----------
    date : str
        iso format date for the day in question,
        e.g. 2016-01-01
    stop : str
        optionally specify a stop datetime in iso format date for the day in question,
        e.g. 2016-01-02 06:00:00
    spatial_unit : flowmachine.core.spatial_unit.*SpatialUnit, default admin3
        Spatial unit to which subscriber locations will be mapped. See the
        docstring of make_spatial_unit for more information.
    hours : tuple of ints, default None
        Subset the result within certain hours, e.g. (4,17)
        This will subset the query only with these hours, but
        across all specified days. Or set to 'all' to include
        all hours.
    method : str, default 'last'
        The method by which to calculate the location of the subscriber.
        This can be either 'most-common' or last. 'most-common' is
        simply the modal location of the subscribers, whereas 'lsat' is
        the location of the subscriber at the time of the final call in
        the data.
    table : str, default 'all'
        schema qualified name of the table which the analysis is
        based upon. If 'ALL' it will use all tables that contain
        location data, specified in flowmachine.yml.
    subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
        Either msisdn, or imei, the column that identifies the subscriber.
    subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
        If provided, string or list of string which are msisdn or imeis to limit
        results to; or, a query or table which has a column with a name matching
        subscriber_identifier (typically, msisdn), to limit results to.

    Notes
    -----
    * A date without a hours and mins will be interpreted as
      midnight of that day, so to get data within a single day
      pass (e.g.) '2016-01-01', '2016-01-02'.

    * Use 24 hr format!

    """
    if spatial_unit is None:
        spatial_unit = make_spatial_unit("admin", level=3)

    # Temporary band-aid; marshmallow deserialises date strings
    # to date objects, so we convert it back here because the
    # lower-level classes still assume we are passing date strings.
    date = standardise_date(date)

    if stop is None:
        # 'cast' the date object as a date
        d1 = parse_datestring(date)
        # One day after this
        d2 = d1 + datetime.timedelta(1)
        stop = standardise_date(d2)
    return locate_subscribers(
        start=date,
        stop=stop,
        spatial_unit=spatial_unit,
        hours=hours,
        method=method,
        table=table,
        subscriber_identifier=subscriber_identifier,
        ignore_nulls=ignore_nulls,
        subscriber_subset=subscriber_subset,
    )
Esempio n. 9
0
def test_datestring_parse_error():
    """
    Test that correct error is raised when failing to parse a datestring.
    """
    with pytest.raises(ValueError):
        parse_datestring("DEFINITELY NOT A DATE")
Esempio n. 10
0
    def __init__(
        self,
        start,
        stop,
        modal_locations=None,
        statistic="avg",
        unit="km",
        hours="all",
        method="last",
        table="all",
        subscriber_identifier="msisdn",
        ignore_nulls=True,
        subscriber_subset=None,
    ):

        # need to subtract one day from hl end in order to be
        # comparing over same period...
        self.stop_sl = stop
        self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1))

        self.start = start

        if modal_locations:
            if isinstance(modal_locations, ModalLocation):
                hl = modal_locations
            else:
                raise ValueError(
                    "Argument 'modal_locations' should be an instance of ModalLocation class"
                )
            hl.spatial_unit.verify_criterion("has_lon_lat_columns")
        else:
            hl = ModalLocation(*[
                daily_location(
                    date,
                    spatial_unit=make_spatial_unit("lon-lat"),
                    hours=hours,
                    method=method,
                    table=table,
                    subscriber_identifier=subscriber_identifier,
                    ignore_nulls=ignore_nulls,
                    subscriber_subset=subscriber_subset,
                ) for date in list_of_dates(self.start, self.stop_hl)
            ])

        sl = SubscriberLocations(
            self.start,
            self.stop_sl,
            spatial_unit=make_spatial_unit("lon-lat"),
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        self.joined = hl.join(
            sl,
            on_left="subscriber",
            on_right="subscriber",
            how="left",
            left_append="_home_loc",
            right_append="",
        )

        self.unit = unit

        super().__init__()