Example #1
0
def xtimes(start, stop=None, step=None):
    checks.check_not_none(start)
    
    if step is None:
        if isinstance(start, (dt.date, dt.time, dt.datetime)) or isinstance(stop, (dt.date, dt.time, dt.datetime)):
            step = dt.timedelta(days=1)
        elif isinstance(start, float) or isinstance(stop, float):
            step = 1.
        else:
            step = 1

    resultwrap = lambda x: x

    if isinstance(start, dt.time):
        start = dt.datetime.combine(dt.datetime(1,1,1,0,0,0), start)
        resultwrap = lambda x: x.time()
    if isinstance(stop, dt.time):
        stop = dt.datetime.combine(dt.datetime(1,1,1,0,0,0), stop) if stop is not None else None
        resultwrap = lambda x: x.time()

    stepfunc = step if checks.is_callable(step) else lambda x: step
    s = stepfunc(start)
    checks.check(npu.sign(s) != 0, 'Step must be positive or negative, not zero')
    
    if stop is None:
        while True:
            yield resultwrap(start)
            start += s
            s = stepfunc(start)
    else:
        while npu.sign(start - stop) == -npu.sign(s):
            yield resultwrap(start)
            start += s
            s = stepfunc(start)
Example #2
0
 def add_ma(self, window, column=None, prefix='ma(${WINDOW},', suffix=')', exclude_column_re=None, include_column_re=None):
     logger = logging.getLogger()
     checks.check_not_none(window)
     if not checks.is_iterable(window): window = [window]
     if column is None: column = self.__input_df.columns
     if not checks.is_iterable_not_string(column): column = [column]
     if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re)
     if include_column_re is not None: include_column_re = re.compile(include_column_re)
     for c in column:
         if include_column_re is not None and not include_column_re.match(c):
             logger.info('- Excluding column due to include_column_re: %s' % c)
             continue
         if exclude_column_re is not None and exclude_column_re.match(c):
             logger.info('- Excluding column due to exclude_column_re: %s' % c)
             continue
         for w in window:
             c_prefix = prefix.replace('${WINDOW}', str(w))
             c_suffix = suffix.replace('${WINDOW}', str(w))
             new_column_name = c_prefix + c + c_suffix
             logger.info('- Adding new MA column: %s' % new_column_name)
             self.__input_df[new_column_name] = self.__input_df[c].rolling(window=w, center=False).mean()
             try:
                 self.__truncate_from_above = max(self.__truncate_from_above, list(self.__input_df[new_column_name].isnull().values).index(False))
             except ValueError:
                 self.__truncate_from_above = max(self.__truncate_from_above, len(self.__input_df))
Example #3
0
 def add_lag(self, lag, column=None, prefix='lag(${LAG},', suffix=')', exclude_column_re=None, include_column_re=None):
     logger = logging.getLogger()
     checks.check_not_none(lag)
     if not checks.is_iterable(lag): lag = [lag]
     if column is None: column = self.__input_df.columns
     if not checks.is_iterable_not_string(column): column = [column]
     if exclude_column_re is not None: exclude_column_re = re.compile(exclude_column_re)
     if include_column_re is not None: include_column_re = re.compile(include_column_re)
     for c in column:
         if include_column_re is not None and not include_column_re.match(c):
             logger.info('- Excluding column due to include_column_re: %s' % c)
             continue
         if exclude_column_re is not None and exclude_column_re.match(c):
             logger.info('- Excluding column due to exclude_column_re: %s' % c)
             continue
         for l in lag:
             c_prefix = prefix.replace('${LAG}', str(l))
             c_suffix = suffix.replace('${LAG}', str(l))
             new_column_name = c_prefix + c + c_suffix
             logger.info('- Adding new lag column: %s' % new_column_name)
             self.__input_df[new_column_name] = self.__input_df[c].shift(l)
             try:
                 self.__truncate_from_above = max(self.__truncate_from_above, list(self.__input_df[new_column_name].isnull().values).index(False))
             except ValueError:
                 self.__truncate_from_above = max(self.__truncate_from_above, len(self.__input_df))
Example #4
0
    def __init__(self,
                 particles=None,
                 weights=None,
                 dim=None,
                 use_n_minus_1_stats=False,
                 sampler=None,
                 copy=True):
        self._particles, self._weights, self._dim = None, None, None

        if particles is not None:
            self._particles = npu.to_ndim_2(particles,
                                            ndim_1_to_col=True,
                                            copy=copy)
            self._dim = npu.ncol(self._particles)
            if weights is None:
                weights = np.ones((npu.nrow(self._particles), 1))
                weights /= float(npu.nrow(self._particles))

        if weights is not None:
            checks.check_not_none(particles)
            self._weights = npu.to_ndim_2(weights,
                                          ndim_1_to_col=True,
                                          copy=copy)
            self._dim = npu.ncol(self._particles)

        if dim is not None:
            self._dim = dim

        if self._particles is not None:
            npc.check_ncol(self._particles, self._dim)
        if self._weights is not None:
            npc.check_nrow(self._weights, npu.nrow(self._particles))

        npu.make_immutable(self._particles, allow_none=True)
        npu.make_immutable(self._weights, allow_none=True)

        self._use_n_minus_1_stats = use_n_minus_1_stats

        # "n minus 1" (unbiased) stats only make sense when using "repeat"-type weights, meaning that each weight
        # represents the number of occurrences of one observation.
        #
        # See https://stats.stackexchange.com/questions/61225/correct-equation-for-weighted-unbiased-sample-covariance

        self._effective_particle_count = None
        self._weight_sum = None
        self._mean = None
        self._var_n = None
        self._var_n_minus_1 = None
        self._cov_n = None
        self._cov_n_minus_1 = None
        self._vol_n = None
        self._vol_n_minus_1 = None

        self._to_string_helper_EmpiricalDistr = None
        self._str_EmpiricalDistr = None

        super().__init__(do_not_init=True)
Example #5
0
def run(observable,
        obss=None,
        times=None,
        obs_covs=None,
        true_values=None,
        df=None,
        fun=None,
        return_df=False):
    if df is not None:
        if obss is not None and (checks.is_string(obss)
                                 or checks.is_int(obss)):
            obss = df[obss]

        if times is None:
            if isinstance(obss, pd.Series): times = obss.index.values
        elif (checks.is_string(times) or checks.is_int(times)):
            times = df[times].values

        if isinstance(obss, pd.Series): obss = obss.values

        if obs_covs is not None and (checks.is_string(obs_covs)
                                     or checks.is_int(obs_covs)):
            obs_covs = df[obs_covs].values

        if true_values is not None and (checks.is_string(true_values)
                                        or checks.is_int(true_values)):
            true_values = df[true_values].values

    checks.check_not_none(obss)

    if not checks.is_iterable_not_string(observable):
        observable = utils.xconst(observable)
    if not checks.is_iterable_not_string(obss): obss = [obss]
    if not checks.is_iterable_not_string(times): times = utils.xconst(times)
    if not checks.is_iterable_not_string(obs_covs):
        obs_covs = utils.xconst(obs_covs)
    if not checks.is_iterable_not_string(true_values):
        true_values = utils.xconst(true_values)

    obs_result = None
    cumulative_log_likelihood = 0.

    if return_df:
        time = []
        filter_name = []
        filter_type = []
        observable_name = []
        accepted = []
        obs_mean = []
        obs_cov = []
        predicted_obs_mean = []
        predicted_obs_cov = []
        cross_cov = []
        innov_mean = []
        innov_cov = []
        prior_state_mean = []
        prior_state_cov = []
        posterior_state_mean = []
        posterior_state_cov = []
        true_value = []
        log_likelihood = []
        gain = []

    last_time = None

    for an_observable, an_obs, a_time, an_obs_cov, a_true_value in zip(
            observable, obss, times, obs_covs, true_values):
        if a_time is None:
            if last_time is None: a_time = 0
            else: a_time = last_time + 1
        last_time = a_time

        if checks.is_callable(an_observable):
            an_observable = an_observable(an_obs)
        if fun is not None: an_obs = fun(an_obs)
        if an_obs_cov is not None:
            if isinstance(an_obs, (Obs, distrs.Distr)):
                raise ValueError(
                    'An observation covariance is provided while the observation is given by a distribution --- conflicting arguments'
                )
            an_obs = distrs.NormalDistr(an_obs, an_obs_cov)

        if return_df and len(time) == 0:
            an_initial_state_mean = an_observable.filter.state.state_distr.mean
            an_initial_state_cov = an_observable.filter.state.state_distr.cov
            time.append(an_observable.filter.time)
            filter_name.append(an_observable.filter.name)
            filter_type.append(type(an_observable.filter))
            observable_name.append(None)
            accepted.append(None)
            obs_mean.append(None)
            obs_cov.append(None)
            predicted_obs_mean.append(None)
            predicted_obs_cov.append(None)
            cross_cov.append(None)
            innov_mean.append(None)
            innov_cov.append(None)
            prior_state_mean.append(
                npu.to_scalar(an_initial_state_mean, raise_value_error=False))
            prior_state_cov.append(
                npu.to_scalar(an_initial_state_cov, raise_value_error=False))
            posterior_state_mean.append(
                npu.to_scalar(an_initial_state_mean, raise_value_error=False))
            posterior_state_cov.append(
                npu.to_scalar(an_initial_state_cov, raise_value_error=False))
            true_value.append(None)
            log_likelihood.append(None)
            gain.append(None)

        if isinstance(an_obs, Obs):
            a_time, _ = _time_and_obs_distr(an_obs, a_time,
                                            an_observable.filter.time)

        predicted_obs = an_observable.predict(time=a_time,
                                              true_value=a_true_value)
        a_prior_state_mean = an_observable.filter.state.state_distr.mean
        a_prior_state_cov = an_observable.filter.state.state_distr.cov
        obs_result = an_observable.observe(obs=an_obs,
                                           time=a_time,
                                           true_value=a_true_value,
                                           predicted_obs=predicted_obs)
        if obs_result.accepted:
            cumulative_log_likelihood += obs_result.log_likelihood
        a_posterior_state_mean = an_observable.filter.state.state_distr.mean
        a_posterior_state_cov = an_observable.filter.state.state_distr.cov

        if return_df:
            time.append(obs_result.obs.time)
            filter_name.append(an_observable.filter.name)
            filter_type.append(type(an_observable.filter))
            observable_name.append(an_observable.name)
            accepted.append(obs_result.accepted)
            obs_mean.append(
                npu.to_scalar(obs_result.obs.distr.mean,
                              raise_value_error=False))
            obs_cov.append(
                npu.to_scalar(obs_result.obs.distr.cov,
                              raise_value_error=False))
            predicted_obs_mean.append(
                npu.to_scalar(obs_result.predicted_obs.distr.mean,
                              raise_value_error=False))
            predicted_obs_cov.append(
                npu.to_scalar(obs_result.predicted_obs.distr.cov,
                              raise_value_error=False))
            cross_cov.append(
                npu.to_scalar(obs_result.predicted_obs.cross_cov,
                              raise_value_error=False))
            innov_mean.append(
                npu.to_scalar(obs_result.innov_distr.mean,
                              raise_value_error=False))
            innov_cov.append(
                npu.to_scalar(obs_result.innov_distr.cov,
                              raise_value_error=False))
            prior_state_mean.append(
                npu.to_scalar(a_prior_state_mean, raise_value_error=False))
            prior_state_cov.append(
                npu.to_scalar(a_prior_state_cov, raise_value_error=False))
            posterior_state_mean.append(
                npu.to_scalar(a_posterior_state_mean, raise_value_error=False))
            posterior_state_cov.append(
                npu.to_scalar(a_posterior_state_cov, raise_value_error=False))
            true_value.append(
                npu.to_scalar(a_true_value, raise_value_error=False))
            log_likelihood.append(
                npu.to_scalar(obs_result.log_likelihood,
                              raise_value_error=False))
            gain.append(
                obs_result.gain if hasattr(obs_result, 'gain') else None)

    df = None
    if return_df:
        df = pd.DataFrame(
            {
                'time': time,
                'filter_name': filter_name,
                'filter_type': filter_type,
                'observable_name': observable_name,
                'accepted': accepted,
                'obs_mean': obs_mean,
                'obs_cov': obs_cov,
                'predicted_obs_mean': predicted_obs_mean,
                'predicted_obs_cov': predicted_obs_cov,
                'cross_cov': cross_cov,
                'innov_mean': innov_mean,
                'innov_cov': innov_cov,
                'prior_state_mean': prior_state_mean,
                'prior_state_cov': prior_state_cov,
                'posterior_state_mean': prior_state_mean,
                'posterior_state_cov': prior_state_cov,
                'true_value': true_value,
                'log_likelihood': log_likelihood,
                'gain': gain
            },
            columns=('time', 'filter_name', 'filter_type', 'observable_name',
                     'accepted', 'obs_mean', 'obs_cov', 'predicted_obs_mean',
                     'predicted_obs_cov', 'cross_cov', 'innov_mean',
                     'innov_cov', 'prior_state_mean', 'prior_state_cov',
                     'posterior_state_mean', 'posterior_state_cov',
                     'true_value', 'log_likelihood', 'gain'))

    return FilterRunResult(obs_result, cumulative_log_likelihood, df)
Example #6
0
File: visual.py Project: valoox/tsa
    def __init__(self,
                 fig,
                 ax,
                 auto_refresh,
                 title,
                 filter_name,
                 process_prior_filter_states,
                 process_posterior_filter_states,
                 process_true_values,
                 process_obs_results,
                 state_indices=None,
                 state_labels=None,
                 observable_names=None,
                 obs_indices=None,
                 obs_labels=None,
                 state_colours=_default_state_colours,
                 true_value_colours=_default_true_value_colours,
                 obs_colours=_default_obs_colours,
                 *args,
                 **kwargs):
        super().__init__(fig, ax, *args, **kwargs)

        self._process_prior_filter_states = process_prior_filter_states
        self._process_posterior_filter_states = process_posterior_filter_states
        self._process_true_values = process_true_values
        self._process_obs_results = process_obs_results

        if state_indices is not None:
            if not checks.is_iterable(state_indices):
                state_indices = (state_indices, )
            else:
                state_indices = tuple(state_indices)
        if state_labels is not None:
            if not checks.is_iterable(state_labels):
                state_labels = (state_labels, )
            else:
                state_labels = tuple(state_labels)
        checks.is_same_len_or_none(state_indices, state_labels)

        if observable_names is not None:
            checks.check_not_none(obs_indices)
            if not checks.is_iterable(observable_names):
                observable_names = (observable_names, )
            else:
                observable_names = tuple(observable_names)
        if obs_indices is not None:
            checks.check_not_none(observable_names)
            if not checks.is_iterable(obs_indices):
                obs_indices = (obs_indices, )
            else:
                obs_indices = tuple(obs_indices)
        if obs_labels is not None:
            if not checks.is_iterable(obs_labels): obs_labels = (obs_labels, )
            else: obs_labels = tuple(obs_labels)
        checks.is_same_len_or_none(observable_names, obs_indices, obs_labels)

        self._auto_refresh = auto_refresh

        self._title = title

        self._filter_name = filter_name

        self._state_indices = state_indices
        self._state_labels = state_labels
        self._observable_names = observable_names
        self._obs_indices = obs_indices
        self._obs_labels = obs_labels

        self._state_colours = state_colours
        self._true_value_colours = true_value_colours
        self._obs_colours = obs_colours

        self._state_and_true_value_plots_inited = False
        self._obs_plots_inited = False

        if self._state_indices is not None:
            self._init_state_and_true_value_plots()

        self._inited_obs_index_count = 0

        if self._observable_names is not None:
            if self._obs_labels is None:
                self._obs_labels = []
                for observable_name, obs_index in zip(self._observable_names,
                                                      self._obs_indices):
                    if self._observable_names.count(observable_name) == 1:
                        self._obs_labels.append(observable_name)
                    else:
                        self._obs_labels.append('%s %d' %
                                                (observable_name, obs_index))
                self._obs_labels = tuple(self._obs_labels)
            self._actual_observable_names = self._observable_names
            self._actual_obs_indices = self._obs_indices
            self._actual_obs_labels = self._obs_labels
            self._init_obs_plots()
            self._obs_plots_inited = True
        else:
            self._actual_observable_names = []
            self._actual_obs_indices = []
            self._actual_obs_labels = []
Example #7
0
def sparsen(df,
            aggregator=mean_or_last,
            date=None,
            time=None,
            datetime=None,
            bucket='date',
            new_bucket_column=None,
            fix_kind='last',
            fix_time=None,
            fix_points=10,
            min_fix_point_count=None,
            max_fix_point_count=None,
            min_min_fix_point_time=None,
            max_min_fix_point_time=None,
            min_max_fix_point_time=None,
            max_max_fix_point_time=None,
            already_sorted=False,
            aggregators_apply_to_df=False,
            exclude_original_temporal_columns=True,
            columns_to_exclude=None,
            return_extra_info=False):
    checks.is_at_least_one_not_none(datetime, date, time)

    if bucket == 'date':
        bucket = lambda x: conv.to_python_date(x, allow_datetimes=True)
    elif bucket == 'week':
        bucket = lambda x: tsatimes.first_day_of_week(x)

    columns_to_exclude = set() if columns_to_exclude is None else set(
        columns_to_exclude)

    if datetime is not None:
        checks.check_all_none(date, time)
        if isinstance(datetime, str):
            if exclude_original_temporal_columns:
                columns_to_exclude.add(datetime)
            if new_bucket_column is None and exclude_original_temporal_columns:
                new_bucket_column = datetime
            datetime = df[datetime].values
        temporals = datetime
    else:
        if isinstance(date, str):
            if exclude_original_temporal_columns: columns_to_exclude.add(date)
            if new_bucket_column is None and exclude_original_temporal_columns:
                new_bucket_column = date
            date = df[date].values
        if isinstance(time, str):
            if exclude_original_temporal_columns: columns_to_exclude.add(time)
            if new_bucket_column is None and exclude_original_temporal_columns:
                new_bucket_column = time
            time = df[time].values

        if date is not None and time is not None:
            temporals = [dt.datetime.combine(d, t) for d, t in zip(date, time)]
        elif date is not None:
            temporals = date
        else:  # time is not None
            temporals = time

    if new_bucket_column is None: new_bucket_column = 'bucket'

    if fix_kind in ('first', 'after'): comparison = 'ge'
    elif fix_kind == 'after_exclusive': comparison = 'gt'
    elif fix_kind in ('last', 'before'): comparison = 'le'
    elif fix_kind == 'before_exclusive': comparison = 'lt'
    else: raise ValueError('Unfamiliar fix_kind: "%s"' % str(fix_kind))

    if fix_kind in ('first', 'last'): checks.check_none(fix_time)
    else: checks.check_not_none(fix_time)

    numeric_fix_points = checks.is_some_number(fix_points)
    if not numeric_fix_points:
        fix_points = conv.to_python_timedelta(fix_points)

    grouping_df = pd.DataFrame({'temporals': temporals})

    grouped_df = grouping_df.groupby(bucket(temporals))

    columns = [new_bucket_column]
    data = {new_bucket_column: []}
    aggs = {}

    if checks.is_some_dict(aggregator): column_agg_pairs = aggregator.items()
    elif checks.is_iterable(aggregator): column_agg_pairs = aggregator
    else: column_agg_pairs = zip(df.columns, utils.xconst(aggregator))
    for column, agg in column_agg_pairs:
        if column not in columns_to_exclude:
            columns.append(column)
            data[column] = []
            aggs[column] = agg

    dates_with_no_points = []
    dates_with_fix_point_limits_breached = col.OrderedDict()
    fix_point_counts = col.OrderedDict()

    for bucket, group_df in grouped_df:
        if len(group_df) == 0: dates_with_no_points.append(bucket)
        if not already_sorted:
            group_df = group_df.copy()
            group_df.sort_values('temporals', inplace=True)
        if fix_kind == 'first': fix_time = group_df['temporals'].values[0]
        elif fix_kind == 'last': fix_time = group_df['temporals'].values[-1]

        if numeric_fix_points:
            if comparison == 'ge':
                fix_point_indices = group_df.index[tsatimes.temporal_ge(
                    group_df['temporals'], fix_time)][0:fix_points]
            elif comparison == 'gt':
                fix_point_indices = group_df.index[tsatimes.temporal_gt(
                    group_df['temporals'], fix_time)][0:fix_points]
            elif comparison == 'le':
                fix_point_indices = group_df.index[tsatimes.temporal_le(
                    group_df['temporals'], fix_time)][-fix_points:]
            else:  # comparison == 'lt'
                fix_point_indices = group_df.index[tsatimes.temporal_lt(
                    group_df['temporals'], fix_time)][-fix_points:]
        else:
            if comparison == 'ge':
                fix_point_indices = group_df.index[(tsatimes.temporal_ge(group_df['temporals'], fix_time)) & \
                                                   (tsatimes.temporal_le(group_df['temporals'], tsatimes.plus_timedelta(fix_time, fix_points)))]
            elif comparison == 'gt':
                fix_point_indices = group_df.index[(tsatimes.temporal_gt(group_df['temporals'], fix_time)) & \
                                                   (tsatimes.temporal_le(group_df['temporals'], tsatimes.plus_timedelta(fix_time, fix_points)))]
            elif comparison == 'le':
                fix_point_indices = group_df.index[(tsatimes.temporal_le(group_df['temporals'], fix_time)) & \
                                                   (tsatimes.temporal_ge(group_df['temporals'], tsatimes.plus_timedelta(fix_time, -fix_points)))]
            else:  # comparison == 'lt':
                fix_point_indices = group_df.index[(tsatimes.temporal_lt(group_df['temporals'], fix_time)) & \
                                                   (tsatimes.temporal_ge(group_df['temporals'], tsatimes.plus_timedelta(fix_time, -fix_points)))]

        fix_point_limits_breached = set()

        if min_fix_point_count is not None and len(
                fix_point_indices) < min_fix_point_count:
            fix_point_limits_breached.add('min_fix_point_count')
        if max_fix_point_count is not None and len(
                fix_point_indices) > max_fix_point_count:
            fix_point_limits_breached.add('max_fix_point_count')
        if min_min_fix_point_time is not None:
            if checks.is_some_timedelta(min_min_fix_point_time):
                the_min_min_fix_point_time = fix_time + min_min_fix_point_time if comparison in (
                    'ge', 'gt') else fix_time - min_min_fix_point_time
            else:
                the_min_min_fix_point_time = min_min_fix_point_time
            if tsatimes.temporal_lt(
                    min(grouping_df['temporals'].values[fix_point_indices]),
                    the_min_min_fix_point_time):
                fix_point_limits_breached.add('min_min_fix_point_time')
        if max_min_fix_point_time is not None:
            if checks.is_some_timedelta(max_min_fix_point_time):
                the_max_min_fix_point_time = fix_time + max_min_fix_point_time if comparison in (
                    'ge', 'gt') else fix_time - max_min_fix_point_time
            else:
                the_max_min_fix_point_time = max_min_fix_point_time
            if tsatimes.temporal_gt(
                    min(grouping_df['temporals'].values[fix_point_indices]),
                    the_max_min_fix_point_time):
                fix_point_limits_breached.add('max_min_fix_point_time')
        if min_max_fix_point_time is not None:
            if checks.is_some_timedelta(min_max_fix_point_time):
                the_min_max_fix_point_time = fix_time + min_max_fix_point_time if comparison in (
                    'ge', 'gt') else fix_time - min_max_fix_point_time
            else:
                the_min_max_fix_point_time = min_max_fix_point_time
            if tsatimes.temporal_lt(
                    max(grouping_df['temporals'].values[fix_point_indices]),
                    the_min_max_fix_point_time):
                fix_point_limits_breached.add('min_max_fix_point_time')
        if max_max_fix_point_time is not None:
            if checks.is_some_timedelta(max_max_fix_point_time):
                the_max_max_fix_point_time = fix_time + max_max_fix_point_time if comparison in (
                    'ge', 'gt') else fix_time - max_max_fix_point_time
            else:
                the_max_max_fix_point_time = max_max_fix_point_time
            if tsatimes.temporal_gt(
                    max(grouping_df['temporals'].values[fix_point_indices]),
                    the_max_max_fix_point_time):
                fix_point_limits_breached.add('max_max_fix_point_time')

        if len(fix_point_limits_breached) > 0:
            dates_with_fix_point_limits_breached[
                bucket] = fix_point_limits_breached
        else:
            data[new_bucket_column].append(bucket)
            for column in columns[1:]:
                if column not in columns_to_exclude:
                    arg = df.iloc[
                        fix_point_indices] if aggregators_apply_to_df else df.iloc[
                            fix_point_indices][column].values
                    data[column].append(aggs[column](arg))
            fix_point_counts[bucket] = len(fix_point_indices)

    df = pd.DataFrame(data, columns=columns)

    if return_extra_info:
        return {
            'df': df,
            'dates_with_no_points': dates_with_no_points,
            'dates_with_fix_point_limits_breached':
            dates_with_fix_point_limits_breached,
            'fix_point_counts': fix_point_counts
        }
    else:
        return df
Example #8
0
def run(observable,
        obss=None,
        times=None,
        obs_covs=None,
        true_values=None,
        df=None,
        fun=None,
        return_df=False):
    if df is not None:
        if obss is not None and checks.is_string(obss):
            obss = df[obss].values
        if times is not None and checks.is_string(times):
            times = df[times].values
        if obs_covs is not None and checks.is_string(obs_covs):
            obs_covs = df[obs_covs].values
        if true_values is not None and checks.is_string(true_values):
            true_values = df[true_values].values

    checks.check_not_none(obss)

    if not checks.is_iterable_not_string(observable):
        observable = utils.xconst(observable)
    if not checks.is_iterable_not_string(obss): obss = [obss]
    if not checks.is_iterable_not_string(times): times = utils.xconst(times)
    if not checks.is_iterable_not_string(obs_covs):
        obs_covs = utils.xconst(obs_covs)
    if not checks.is_iterable_not_string(true_values):
        true_values = utils.xconst(true_values)

    obs_result = None

    if return_df:
        time = []
        accepted = []
        obs_mean = []
        obs_cov = []
        predicted_obs_mean = []
        predicted_obs_cov = []
        innov_mean = []
        innov_cov = []
        prior_state_mean = []
        prior_state_cov = []
        posterior_state_mean = []
        posterior_state_cov = []
        log_likelihood = []

    for an_observable, an_obs, a_time, an_obs_cov, a_true_value in zip(
            observable, obss, times, obs_covs, true_values):
        if checks.is_callable(an_observable):
            an_observable = an_observable(an_obs)
        if fun is not None: an_obs = fun(an_obs)
        if an_obs_cov is not None:
            if isinstance(an_obs, (Obs, distrs.Distr)):
                raise ValueError(
                    'An observation covariance is provided while the observation is given by a distribution --- conflicting arguments'
                )
            an_obs = distrs.NormalDistr(an_obs, an_obs_cov)

        if return_df and len(time) == 0:
            an_initial_state_mean = an_observable.filter.state.state_distr.mean
            an_initial_state_cov = an_observable.filter.state.state_distr.cov
            time.append(an_observable.filter.time)
            accepted.append(None)
            obs_mean.append(None)
            obs_cov.append(None)
            predicted_obs_mean.append(None)
            predicted_obs_cov.append(None)
            innov_mean.append(None)
            innov_cov.append(None)
            prior_state_mean.append(
                npu.to_scalar(an_initial_state_mean, raise_value_error=False))
            prior_state_cov.append(
                npu.to_scalar(an_initial_state_cov, raise_value_error=False))
            posterior_state_mean.append(
                npu.to_scalar(an_initial_state_mean, raise_value_error=False))
            posterior_state_cov.append(
                npu.to_scalar(an_initial_state_cov, raise_value_error=False))
            log_likelihood.append(None)

        if isinstance(an_obs, Obs):
            a_time, _ = _time_and_obs_distr(an_obs, a_time,
                                            an_observable.filter.time)

        predicted_obs = an_observable.predict(time=a_time,
                                              true_value=a_true_value)
        a_prior_state_mean = an_observable.filter.state.state_distr.mean
        a_prior_state_cov = an_observable.filter.state.state_distr.cov
        obs_result = an_observable.observe(obs=an_obs,
                                           time=a_time,
                                           true_value=a_true_value,
                                           predicted_obs=predicted_obs)
        a_posterior_state_mean = an_observable.filter.state.state_distr.mean
        a_posterior_state_cov = an_observable.filter.state.state_distr.cov

        if return_df:
            time.append(obs_result.obs.time)
            accepted.append(obs_result.accepted)
            obs_mean.append(
                npu.to_scalar(obs_result.obs.distr.mean,
                              raise_value_error=False))
            obs_cov.append(
                npu.to_scalar(obs_result.obs.distr.cov,
                              raise_value_error=False))
            predicted_obs_mean.append(
                npu.to_scalar(obs_result.predicted_obs.distr.mean,
                              raise_value_error=False))
            predicted_obs_cov.append(
                npu.to_scalar(obs_result.predicted_obs.distr.cov,
                              raise_value_error=False))
            innov_mean.append(
                npu.to_scalar(obs_result.innov_distr.mean,
                              raise_value_error=False))
            innov_cov.append(
                npu.to_scalar(obs_result.innov_distr.cov,
                              raise_value_error=False))
            prior_state_mean.append(
                npu.to_scalar(a_prior_state_mean, raise_value_error=False))
            prior_state_cov.append(
                npu.to_scalar(a_prior_state_cov, raise_value_error=False))
            posterior_state_mean.append(
                npu.to_scalar(a_posterior_state_mean, raise_value_error=False))
            posterior_state_cov.append(
                npu.to_scalar(a_posterior_state_cov, raise_value_error=False))
            log_likelihood.append(
                npu.to_scalar(obs_result.log_likelihood,
                              raise_value_error=False))

    if return_df:
        return pd.DataFrame(
            {
                'time': time,
                'accepted': accepted,
                'obs_mean': obs_mean,
                'obs_cov': obs_cov,
                'predicted_obs_mean': predicted_obs_mean,
                'predicted_obs_cov': predicted_obs_cov,
                'innov_mean': innov_mean,
                'innov_cov': innov_cov,
                'prior_state_mean': prior_state_mean,
                'prior_state_cov': prior_state_cov,
                'posterior_state_mean': prior_state_mean,
                'posterior_state_cov': prior_state_cov,
                'log_likelihood': log_likelihood
            },
            columns=('time', 'accepted', 'obs_mean', 'obs_cov',
                     'predicted_obs_mean', 'predicted_obs_cov', 'innov_mean',
                     'innov_cov', 'prior_state_mean', 'prior_state_cov',
                     'posterior_state_mean', 'posterior_state_cov',
                     'log_likelihood'))

    return obs_result