コード例 #1
0
    def initial_state(self,
                      name,
                      lagged_values,
                      lagged_times,
                      machine_params=None,
                      machine_state=None,
                      **ignore):
        #
        machine = self.machine_type(params=deepcopy(machine_params),
                                    state=deepcopy(machine_state),
                                    hyper_params=deepcopy(
                                        self.machine_hyper_params))

        chronological_values = list(reversed(lagged_values))
        chronological_times = list(reversed(lagged_times))
        as_process = is_process(chronological_values)
        values = list(np.diff(
            [0.] +
            chronological_values)) if as_process else chronological_values
        dts = list(
            np.diff([chronological_times[0] - 1.0] + chronological_times))

        for value, dt in zip(values, dts):
            machine.update(value=value, dt=dt)
        return {
            't': lagged_times[0],
            'machine': machine,
            'as_process': True,
            'dt': approx_dt(lagged_times),
            'name': name
        }
コード例 #2
0
def fox_sample(lagged_values, lagged_times, delay, num, name, as_process=None):
    " Elementary but not completely woeful sampler, used by Malaxable Fox"
    dt = approx_dt(lagged_times)
    lag = max(10, math.ceil(delay / dt))
    print('lag = ' + str(lag))
    is_proc = as_process or ('~' not in name and StatsConventions.is_process(lagged_values))
    if len(lagged_values) < 250 + lag or not is_proc:
        values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=as_process)
        ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=values, lagged_values=lagged_values))
    else:
        changes = np.diff(list(reversed(lagged_values)), n=lag)
        counter = dict(Counter(changes))
        d = dict(counter)
        num_total = len(changes)
        d1 = dict([(change, round(175 * change_count / num_total)) for change, change_count in d.items()])
        values = list()
        for change, rounded_count in d1.items():
            values.extend([change] * rounded_count)
        change_spray = list(range(-50, 50))
        values.extend(change_spray)
        change_values = values[:num]
        abs_values = [lagged_values[0] + chg for chg in change_values]
        if not len(abs_values) == num:
            # Too many rounded down ... may not be discrete
            abs_values = exponential_bootstrap(lagged=lagged_values, decay=0.1, num=num, as_process=True)
        ret_values = StatsConventions.nudged(project_on_lagged_lattice(values=abs_values, lagged_values=lagged_values))

    return ret_values
コード例 #3
0
 def update_state(self,
                  state,
                  lagged_values=None,
                  lagged_times=None,
                  **ignore):
     """ Use recently added values to update the digest """
     name = state['name']
     times = lagged_times or self.get_lagged_times(name=name)
     values = lagged_values or self.get_lagged_values(name=name)
     state['dt'] = approx_dt(times)
     new_values = [
         v for t, v in zip(times, values) if t > state['t'] - 0.0001
     ]  # Include one previous value in new_values, so we can difference
     new_data = np.diff(
         list(new_values)) if state['as_process'] else new_values[1:]
     for data in new_data:
         state['digest'].update(data)
     return state
コード例 #4
0
 def initial_state(self, name, **ignore):
     """ Decide if it is a process or not, and create initial sketch of CDF of values or changes in values """
     # This is one off. Restarting may change the classification !
     values = self.get_lagged_values(name=name)
     times = self.get_lagged_times(name=name)
     digest = TDigest()
     as_process = is_process(values)
     data = np.diff(list(values) +
                    [0., 0.]) if is_process(values) else values
     for value in data:
         digest.update(value)
     return {
         't': times[0],
         'digest': digest,
         'as_process': as_process,
         'dt': approx_dt(times),
         'name': name
     }
コード例 #5
0
 def update_state(self,
                  state,
                  lagged_values=None,
                  lagged_times=None,
                  **ignore):
     """ Use recently added values to update the machine """
     machine = state['machine']
     chronological_values = list(reversed(lagged_values))
     chronological_times = list(reversed(lagged_times))
     state['dt'] = approx_dt(chronological_times)
     new_data = [
         (t, v) for t, v in zip(chronological_times, chronological_values)
         if t > state['t'] - 0.0001
     ]  # Include one previous value in new_values, so we can difference
     new_chronological_values = list(np.diff([
         d[0] for d in new_data
     ])) if state['as_process'] else [d[0] for d in new_data[1:]]
     new_chronological_dt = list(np.diff([d[0] for d in new_data]))
     for value, dt in zip(new_chronological_values, new_chronological_dt):
         machine.update(value=value, dt=dt)
     state['machine'] = machine
     return state
コード例 #6
0
    def sample(self,
               lagged_values,
               lagged_times=None,
               name=None,
               delay=None,
               **ignored):
        """ Use skater to move and scale """

        if name not in self.stream_state:
            self.stream_state[name] = {
                'skater_state': {},
                'x': None,
                'x_std': None,
                'dt': None,
                't': None,
                'lookup': None
            }  # Map from delay

        state = self.stream_state[name]

        if state['dt'] is None:
            # Initialize lookups from delay to steps ahead
            state['dt'] = approx_dt(lagged_times)
            state['lookup'] = dict([
                (dly, split_k(max(1, 0.1 + dly / (0.01 + state['dt'])) - 1))
                for dly in self.DELAYS
            ])
            state['k'] = int(math.ceil(
                (self.DELAYS[-1] + 1.0) / state['dt']))  # max k

        # Determine which observations are yet to be processed by the skater
        if state['t'] is None:
            ys = reversed(lagged_values[:self.n_warm])
            ts = reversed(lagged_times[:self.n_warm])
        else:
            all_t = reversed(lagged_times)
            all_y = reversed(lagged_values)
            yt = [(y_, t_) for y_, t_ in zip(all_y, all_t)
                  if t_ > state['t'] + 1e-6]
            ys = [yt_[0] for yt_ in yt]
            ts = [yt_[1] for yt_ in yt]

        # Run the skater
        for y_, t_ in zip(ys, ts):
            state['x'], state['x_std'], state['skater_state'] = self.f(
                y=y_,
                s=state['skater_state'],
                k=state['k'],
                a=None,
                t=t_,
                e=None)
            state['t'] = t_

        # Interpolate point estimate and std errors
        (low_k, low_k_weight), (high_k, high_k_weight) = state['lookup'][delay]
        x_interp = low_k_weight * state['x'][low_k] + high_k_weight * state[
            'x'][high_k]
        x_std_interp = low_k_weight * state['x_std'][
            low_k] + high_k_weight * state['x_std'][high_k]

        # Save stream state for next invocation
        self.stream_state[name] = state

        # Create a hacky estimate of standard error, if necessary
        if not self.use_std:
            x_std_interp = k_std(lagged_values, k=high_k)

        return self.sample_using_point_estimate(x=x_interp,
                                                x_std=x_std_interp,
                                                k=high_k,
                                                name=name,
                                                delay=delay,
                                                lagged_values=lagged_values,
                                                lagged_times=lagged_times)