def __init__(self, sensor: Sensor, source: BeliefSource, value: float, **kwargs): self.sensor = sensor self.source = source self.event_value = value if "cumulative_probability" in kwargs: self.cumulative_probability = kwargs["cumulative_probability"] elif "cp" in kwargs: self.cumulative_probability = kwargs["cp"] elif "sigma" in kwargs: self.cumulative_probability = ( 1 / 2 + (math.erf(kwargs["sigma"] / 2**0.5)) / 2) else: self.cumulative_probability = 0.5 if "event_start" in kwargs: self.event_start = tb_utils.enforce_utc(kwargs["event_start"]) elif "event_time" in kwargs: if self.sensor.event_resolution != timedelta(): raise KeyError( "Sensor has a non-zero resolution, so it doesn't measure instantaneous events. " "Use event_start instead of event_time.") self.event_start = tb_utils.enforce_utc(kwargs["event_time"]) if "belief_horizon" in kwargs: self.belief_horizon = kwargs["belief_horizon"] elif "belief_time" in kwargs: belief_time = tb_utils.enforce_utc(kwargs["belief_time"]) self.belief_horizon = ( self.sensor.knowledge_time(self.event_start) - belief_time)
def fixed_viewpoint( self, belief_time: datetime = None, belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = ( None, None, ), update_belief_times: bool = False, ) -> "BeliefsDataFrame": """Select the most recent belief about each event at a given belief time. NB: with a fixed viewpoint the horizon increases as you look further ahead. Alternatively, select the most recent belief formed within a certain time window. This allows setting a maximum freshness of the data. :Example: >>> # Select the latest beliefs formed before June 6th 2018 about each event >>> df.fixed_viewpoint(belief_time=datetime(2018, 6, 6)) >>> # Or equivalently: >>> df.fixed_viewpoint(belief_time_window=(None, datetime(2018, 6, 6, tzinfo=utc))) >>> # Select the latest beliefs formed from June 1st to June 6th (up to June 6th 0:00 AM) >>> df.fixed_viewpoint(belief_time_window=(datetime(2018, 6, 1, tzinfo=utc), datetime(2018, 6, 6, tzinfo=utc))) :param belief_time: datetime indicating the belief should be formed at least before this time :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed :param update_belief_times: if True, update the belief time of each belief with the given fixed viewpoint """ if belief_time is not None: if belief_time_window != (None, None): raise ValueError( "Cannot pass both a belief time and belief time window.") belief_time_window = (None, belief_time) df = self if "belief_time" not in df.index.names: df = df.convert_index_from_belief_horizon_to_time() if belief_time_window[0] is not None: df = df[df.index.get_level_values("belief_time") >= tb_utils.enforce_utc(belief_time_window[0])] if belief_time_window[1] is not None: df = df[df.index.get_level_values("belief_time") <= tb_utils.enforce_utc(belief_time_window[1])] df = belief_utils.select_most_recent_belief(df) if update_belief_times is True: return tb_utils.replace_multi_index_level( df, "belief_time", pd.DatetimeIndex(data=[belief_time_window[1]] * len(df.index)), ) else: return df
def belief_history( self, event_start: datetime, belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = ( None, None, ), belief_horizon_window: Tuple[Optional[timedelta], Optional[timedelta]] = ( None, None, ), keep_event_start: bool = False, ) -> "BeliefsDataFrame": """Select all beliefs about a single event, identified by the event's start time. Optionally select a history of beliefs formed within a certain time window. Alternatively, select a history of beliefs formed a certain horizon window before knowledge time (with negative horizons indicating post knowledge time). :Example: >>> # Select beliefs formed before June 20th 2018 >>> df.belief_history(event_start, belief_time_window=(None, datetime(2018, 6, 20, tzinfo=utc))) >>> # Select beliefs formed from 5 to 10 hours before knowledge time >>> df.belief_history(event_start, belief_horizon_window=(timedelta(hours=5), timedelta(hours=10))) >>> # Select beliefs formed from 2 hours after to 10 hours before knowledge time >>> df.belief_history(event_start, belief_horizon_window=(timedelta(hours=-2), timedelta(hours=10))) :param event_start: start time of the event :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed :param belief_horizon_window: optional tuple specifying a horizon window (e.g. between 1 and 2 hours before the event value could have been known) """ df = self.xs(tb_utils.enforce_utc(event_start), level="event_start", drop_level=False).sort_index() if belief_time_window[0] is not None: df = df[df.index.get_level_values("belief_time") >= belief_time_window[0]] if belief_time_window[1] is not None: df = df[df.index.get_level_values("belief_time") <= belief_time_window[1]] if belief_horizon_window != (None, None): if belief_time_window != (None, None): raise ValueError( "Cannot pass both a belief time window and belief horizon window." ) df = df.convert_index_from_belief_time_to_horizon() if belief_horizon_window[0] is not None: df = df[df.index.get_level_values("belief_horizon") >= belief_horizon_window[0]] if belief_horizon_window[1] is not None: df = df[df.index.get_level_values("belief_horizon") <= belief_horizon_window[1]] df = df.convert_index_from_belief_horizon_to_time() if not keep_event_start: df = df.droplevel("event_start") return df
def query( cls, session: Session, sensor: DBSensor, event_before: datetime = None, event_not_before: datetime = None, belief_before: datetime = None, belief_not_before: datetime = None, source: Union[int, List[int], str, List[str]] = None, ) -> "BeliefsDataFrame": """Query beliefs about sensor events. :param session: the database session to use :param sensor: sensor to which the beliefs pertain :param event_before: only return beliefs about events that end before this datetime (inclusive) :param event_not_before: only return beliefs about events that start after this datetime (inclusive) :param belief_before: only return beliefs formed before this datetime (inclusive) :param belief_not_before: only return beliefs formed after this datetime (inclusive) :param source: only return beliefs formed by the given source or list of sources (pass their id or name) :returns: a multi-index DataFrame with all relevant beliefs TODO: rename params for clarity: event_finished_before, even_starts_not_before (or similar), same for beliefs """ # Check for timezone-aware datetime input if event_before is not None: event_before = tb_utils.enforce_utc(event_before) if event_not_before is not None: event_not_before = tb_utils.enforce_utc(event_not_before) if belief_before is not None: belief_before = tb_utils.enforce_utc(belief_before) if belief_not_before is not None: belief_not_before = tb_utils.enforce_utc(belief_not_before) # Query sensor for relevant timing properties event_resolution, knowledge_horizon_fnc, knowledge_horizon_par = ( session.query( DBSensor.event_resolution, DBSensor.knowledge_horizon_fnc, DBSensor.knowledge_horizon_par, ).filter(DBSensor.id == sensor.id).one_or_none()) # Get bounds on the knowledge horizon (so we can already roughly filter by belief time) knowledge_horizon_min, knowledge_horizon_max = sensor_utils.eval_verified_knowledge_horizon_fnc( knowledge_horizon_fnc, knowledge_horizon_par, None) # Query based on start_time_window q = session.query(cls).filter(cls.sensor_id == sensor.id) # Apply event time filter if event_before is not None: q = q.filter(cls.event_start + event_resolution <= event_before) if event_not_before is not None: q = q.filter(cls.event_start >= event_not_before) # Apply rough belief time filter if belief_before is not None: q = q.filter(cls.event_start <= belief_before + cls.belief_horizon + knowledge_horizon_max) if belief_not_before is not None: q = q.filter(cls.event_start >= belief_not_before + cls.belief_horizon + knowledge_horizon_min) # Apply source filter if source is not None: source_list = [source] if not isinstance(source, list) else source id_list = [s for s in source_list if isinstance(s, int)] name_list = [s for s in source_list if isinstance(s, str)] if len(id_list) + len(name_list) < len(source_list): unidentifiable_list = [ s for s in source_list if not isinstance(s, int) and not isinstance(s, str) ] raise ValueError( "Query by source failed: query only possible by integer id or string name. Failed sources: %s" % unidentifiable_list) else: q = q.join(DBBeliefSource).filter( (cls.source_id.in_(id_list)) | (DBBeliefSource.name.in_(name_list))) # Build our DataFrame of beliefs df = BeliefsDataFrame(sensor=sensor, beliefs=q.all()) # Actually filter by belief time if belief_before is not None: df = df[df.index.get_level_values("belief_time") < belief_before] if belief_not_before is not None: df = df[ df.index.get_level_values("belief_time") >= belief_not_before] return df
def knowledge_time(self, event_start: datetime) -> datetime: event_start = enforce_utc(event_start) return event_start - self.knowledge_horizon(event_start)
def knowledge_horizon(self, event_start: datetime = None) -> timedelta: event_start = enforce_utc(event_start) return eval_verified_knowledge_horizon_fnc(self.knowledge_horizon_fnc, self.knowledge_horizon_par, event_start)
def __init__(self, *args, **kwargs): """Initialise a multi-index DataFrame with beliefs about a unique sensor.""" # Obtain parameters that are specific to our DataFrame subclass sensor: Sensor = kwargs.pop("sensor", None) source: BeliefSource = kwargs.pop("source", None) event_start: datetime = kwargs.pop("event_start", None) belief_time: datetime = kwargs.pop("belief_time", None) cumulative_probability: float = kwargs.pop("cumulative_probability", None) beliefs: List[TimedBelief] = kwargs.pop("beliefs", None) # Define our columns and indices columns = ["event_value"] indices = [ "event_start", "belief_time", "source", "cumulative_probability" ] # Use our constructor if initialising from a previous (Beliefs)DataFrame (e.g. when slicing), copying the Sensor metadata # TODO: how is the metadata copied here? if beliefs is None: super().__init__(*args, **kwargs) if isinstance(args[0], pd.DataFrame): # Set (possibly overwrite) each index level to a unique value if set explicitly if source is not None: self["source"] = source if event_start is not None: self["event_start"] = tb_utils.enforce_utc(event_start) if belief_time is not None: self["belief_time"] = tb_utils.enforce_utc(belief_time) if cumulative_probability is not None: self["cumulative_probability"] = cumulative_probability # Check for correct types and convert if possible self["event_start"] = pd.to_datetime(self["event_start"], utc=True) self["belief_time"] = pd.to_datetime(self["belief_time"], utc=True) if any(c != BeliefSource for c in self["source"].map(type)): warnings.warn( "DataFrame contains sources of type other than BeliefSource." ) # Set index levels and metadata self.set_index(indices, inplace=True) self.sensor = sensor self.event_resolution = sensor.event_resolution return # Call the pandas DataFrame constructor with the right input kwargs["columns"] = columns if beliefs: sources = set(belief.source for belief in beliefs) source_names = set(source.name for source in sources) if len(source_names) != len(sources): raise ValueError( "Source names must be unique. Cannot initialise BeliefsDataFrame given the following unique sources:\n%s" % sources) beliefs = sorted( beliefs, key=lambda b: ( b.event_start, b.belief_time, b.source, b.cumulative_probability, ), ) kwargs["data"] = [[getattr(i, j) for j in columns] for i in beliefs] kwargs["index"] = pd.MultiIndex.from_tuples( [[getattr(i, j) for j in indices] for i in beliefs], names=indices) else: kwargs["index"] = pd.MultiIndex( levels=[[] for _ in indices], codes=[[] for _ in indices], names=indices) # Todo support pandas 0.23 super().__init__(*args, **kwargs) # Clean up duplicate beliefs self.reset_index(inplace=True) self.drop_duplicates(inplace=True) self.set_index(indices, inplace=True) # Set the Sensor metadata (including timing properties of the sensor) self.sensor = sensor self.event_resolution = self.sensor.event_resolution