def empirical_ema_r1(y: Y_TYPE, s, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Exponential moving average, with empirical std r weight to place on existing anchor point """ assert r is not None y0 = wrap(y)[0] if not s.get('p'): s = {'p': {}, 'x': y0, 'rho': r} assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1' else: assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable' if y0 is None: return None, s, None else: s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0 # Make me better ! x = [s['x']] * k _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) x_std_fallback = nonecast(x_std, fill_value=1.0) return [s['x']] * k, x_std_fallback, s
def nprophet_fit_and_predict_simple( y: [float], k: int, freq: str = None, model_params: dict = None) -> Tuple[List, List, Any, Any]: """ Simpler wrapper for testing - univariate only """ assert isinstance(y[0], float) freq = freq or NPROPHET_META['freq'] used_params = NPROPHET_MODEL used_params.update({'n_forecasts': k}) if model_params: used_params.update(model_params) if len(y) < used_params['n_lags']: x = [wrap(y)[0]] * k x_std = [1.0] * k return x, x_std, None, None else: model = NeuralProphet(**used_params) model.set_log_level(log_level='CRITICAL') df = pd.DataFrame(columns=['y'], data=y) df['ds'] = pd.date_range(start='2021-01-01', periods=len(y), freq=freq) metrics = model.fit(df, freq=freq, epochs=40, use_tqdm=False) future = model.make_future_dataframe(df) forecast = model.predict(future) x = [ forecast['yhat' + str(j + 1)].values[-k + j] for j in range(k) ] x_std = [1.0] * k return x, x_std, forecast, model
def fbprophet_cautious(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Similar to fbexogenous, but no crazy nonsense """ if not s.get('s'): s['s'] = {} # prophet's state s['y'] = list() # maintain last five values y0 = wrap(y)[0] s['y'].append(y0) if len(s['y']) > 5: s['y'].pop(0) import math x_upper = [ np.max(s['y']) + math.sqrt(j + 1) * np.std(s['y']) for j in range(k) ] x_lower = [ np.min(s['y']) - math.sqrt(j + 1) * np.std(s['y']) for j in range(k) ] x, x_std, s['s'] = fbprophet_univariate(y=y, s=s['s'], k=k, a=a, t=t, e=e) x_careful = np.minimum(np.array(x), np.array(x_upper)) x_careful = np.maximum(x_careful, np.array(x_lower)) return list(x_careful), x_std, s
def nproph_univariate(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None): """ Uses only y[0] and ignores y[1:] and a[:] """ y0 = [wrap(y)[0]] return nproph_skater_factory( y=y0, s=s, k=k, a=None, t=t, e=e, method='auto' ) # def nproph_exogenous(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None): # """ Predict using auto_arima, with both simultaneously observed and known in advance variables # This skater has no hyper-parameters # # y: Y_TYPE scalar or list where y[1:] are interpreted as contemporaneously observed exogenous variables # s: state # k: Number of steps ahead to predict # a: (optional) scalar or list of variables known k-steps in advance. # (IMPORTANT: If supplying 'a', provide the known variable k steps ahead, not the contemporaneous one !). # t: (optional) Time of observation. # e: (optional) Maximum computation time (supply e>60 to give hint to do fitting) # # :returns: x [float] , s', scale [float] # """ # return nproph_skater_factory(y=y, s=s, k=k, a=a, t=t, e=e, method='auto') # # # def nproph_known(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None): # """ Uses known-in-advance but not y[1:] """ # y0 = [wrap(y)[0]] # return nproph_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, method='auto') # def nproph_exog_compare(f,k=1): # from timemachines.skatertools.evaluation.evaluators import evaluate_mean_absolute_error # from timemachines.skatertools.evaluation.evaluators import hospital_with_exog # y, a = hospital_with_exog(k=k) # y0 = [ yi[0] for yi in y ] # # r = 0.1 # Doesn't matter? # err1 = evaluate_mean_absolute_error(f=f, k=k, y=y0, r=r, n_burn=250) # err2 = evaluate_mean_absolute_error(f=f, k=k, y=y, r=r, n_burn=250) # err3 = evaluate_mean_absolute_error(f=f, k=k, y=y, r=r, a=a, n_burn=250) # errlv = evaluate_mean_absolute_error(f=empirical_last_value, k=k, y=y, r=r, a=a, n_burn=250) # # # print('----------------') # print("Error w/o exogenous = "+str(err1)) # print("Error w exogenous = "+str(err2)) # print("Error w exo + known = "+str(err3)) # print("Error last val cache = " + str(errlv)) # # # if __name__ == '__main__': # f = nproph_exogenous # if True: # prior_plot_exogenous(f=f, k=1, n=200) # if True: # prior_plot(f=f,k=1,n=200) #
def is_opinonated(y, forecast: pd.DataFrame, k: int, n_recent: int, multiple: float) -> bool: """ Check if the forecast is far from any recent values, and thus "opinionated" :param y: data used to fit :param forecast: dataframe produced by prophet fitting :param m: fitted facebook prophet model :param k: number of steps ahead :return: """ if isinstance(y[0], float): y = [wrap(yj) for yj in y] y0 = [yj[0] for yj in y] for j in range(1, k + 1): j_std = np.nanstd(np.diff(y0[-k - 50:-k], j)) recent_ys = y0[-(k + n_recent):-(k + 1)] upper = np.max(recent_ys) + multiple * j_std * math.sqrt(j) + 0.1 lower = np.min(recent_ys) - multiple * j_std * math.sqrt(j) - 0.1 j_x = forecast['yhat'].values[-(1 + k - j)] if j_x > upper or j_x < lower: deviation = abs(j_x - upper) print(deviation) return True return False
def tsa_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, p: int = TSA_P_DEFAULT, d: int = TSA_D_DEFAULT, q: int = TSA_D_DEFAULT) -> ([float], Any, Any): """ Extremely simple univariate, fixed p,d,q ARIMA model that is re-fit each time """ # TODO: FIX THIS TO USE EMPIRICAL STD, OTHERWISE ENSEMBLES ARE DREADFUL y = wrap(y) a = wrap(a) if not s.get('y'): s = {'y': list(), 'a': list(), 'k': k, 'p': {}} else: # Assert immutability of k, dimensions if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if len(s['y']) > max(2 * k + 5, TSA_META['n_warm']): y0s = [y_[0] for y_ in s['y']] model = ARIMA(y0s, order=(p, d, q)) try: x = list(model.fit().forecast(steps=k)) except: x = [wrap(y)[0]] * k else: x = [y[0]] * k y0 = wrap(y)[0] _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) x_std_fallback = nonecast(x_std, fill_value=1.0) return x, x_std_fallback, s
def regress_level_on_first_known(y:Y_TYPE, s:dict, k, a:A_TYPE=None, t:T_TYPE =None, e:E_TYPE =None)->([float] , Any , Any): """ Very basic online regression skater, mostly for testing - Only one known in advance variable is utilized - Last value is ignored, unless a is None in which case we return 0.0 - Empirical std is returned """ y0 = wrap(y)[0] # Ignore contemporaneous, exogenous variables if a: a0 = wrap(a)[0] # Ignore all but the first known-in-advance variable if not s.get('k'): # First invocation s = {'p': {}} # Prediction parade s['r'] = {} # Regression state, not to be confused with hyper-param r s['k'] = k s['o'] = {} # The "observance" will quarantine 'a' until it can be matched else: assert s['k']==k # Immutability if a is None: return [0]*k, [1.0]*k, s else: a_t, s['o'] = observance( y=[y0],o=s['o'], k=k, a= [a0]) # Update the observance if a_t is not None: # This is the contemporaneous 'a', which was supplied k calls ago. if not s['r']: # When first calling the online regression algorithm we avoid the degenerate case # by sending it two observations. y_noise = 0.1*(1e-6+abs(y0))*np.random.randn() x_noise = 0.1*(1e-6+abs(a0))*np.random.randn() x = [ a_t[0]-x_noise, a_t[0]+x_noise ] y = [ y0-y_noise, y0+y_noise ] s['r'] = regress_one_helper(x=x, y=y, r=s['r']) else: s['r'] = regress_one_helper(x=a_t, y=[y0], r=s['r']) # Predict using contemporaneous alpha's x = [ s['r']['alpha'] + s['r']['beta']*ak[0] for ak in s['o']['a'] ] # Push prediction into the parade and get the current bias/stderr bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) return x, x_std, s # TODO: Use the std implied by regression instead else: x = [y0]*k bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) return x , x_std, s
def pmd_known(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses known-in-advance but not y[1:] """ y0 = [wrap(y)[0]] return pmd_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, method='auto')
def fbprophet_univariate(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """ y0 = [wrap(y)[0]] return fbprophet_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e)
def fbprophet_known(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses known-in-advance but not y[1:] """ y0 = [wrap(y)[0]] return fbprophet_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e)
def pmd_univariate(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses only y[0] and ignores y[1:] and a[:] """ y0 = [wrap(y)[0]] return pmd_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e, method='auto')
def trivial_last_value(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None) -> ([float], [float], Any): """ Last value cache """ if y is None: return None, None, s else: y0 = wrap(y)[0] # Ignore the rest x = [y0] * k # What a great prediction ! return x, 1.0, {}
def fbprophet_skater_testor(y :Y_TYPE, s:dict=None, k:int =1, a:A_TYPE =None, t:T_TYPE=None, e:E_TYPE =None, r:R_TYPE =None, freq=None, n_max=None): """ A default facebook prophet usage, with no hyper-parameters and no prediction parade """ # For testing if freq is None: freq = PROPHET_META['freq'] if n_max is None: n_max = PROPHET_META['n_max'] y = wrap(y) a = wrap(a) if not s.get('y'): s = {'y': list(), 'a': list(), 'k': k} else: # Assert immutability of k, dimensions if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if len(s['y']) > max(2*k+5,PROPHET_META['n_warm']): x, x_std, _, _ = prophet_iskater_factory(y=s['y'], k=k, a=s['a'], freq=freq, n_max=n_max) else: x = [y[0]] * k x_std = [1.0] * k return x, x_std, s
def nprophet_iskater_factory(y: [[float]], k: int, a: List = None, t: List = None, e=None, freq: str = None, n_max=1000, recursive: bool = False, model_params: dict = None, return_forecast=True): # For now we keep it simple. Will add to this over time y0s = [wrap(yi)[0] for yi in y] x, x_std, forecast, m = nprophet_fit_and_predict_simple( y=y0s, k=k, freq=freq, model_params=model_params) return (x, x_std, forecast, m) if return_forecast else (x, x_std)
def dlm_exogenous_r3(y, s, k, a, t, e, r): """ One way to use dlm :returns: x, s', w """ if not s: s = dict() s['dim'] = dimension(y) s = dlm_set_exog_hyperparams(s=s, r=r) y0, exog = split_exogenous(y=y) s['n_obs'] = 0 s['model'] = quietDlm([], printInfo=False) + trend( s['trend_degree'], s['discount']) + seasonality( s['period'], s['discount']) s['model'] = s['model'] + fixedAutoReg( degree=s['auto_degree'], name='ar', w=1.0) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] s['model'] = s['model'] + dynamic(features=exog_wrapped, discount=0.99, name='exog') # Set's first exog if y is not None: y = wrap(y) assert dimension(y) == s['dim'], 'Cannot change dimension of data sent' s['n_obs'] += 1 y0, exog = split_exogenous(y=y) y0_passed_in = None if np.isnan( y0) else y0 # pydlm uses None for missing values s['model'].append([y0_passed_in]) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] if s['n_obs'] > 1: s['model'].append( data=exog_wrapped, component='exog') # Don't get first exog twice num_obs = len(s['model'].data) if s.get('model') else 0 if num_obs % s['n_fit'] == s['n_fit'] - 1: _, _, s = dlm_exogenous_r3(y=None, s=s, k=k, a=a, t=t, e=10, r=r) s['model'].fitForwardFilter() return _dlm_exog_prediction_helper(s=s, k=k, y=y) if y is None: if dimension(y) == 1: s['model'].tune(maxit=20) # Don't tune if exogenous ... haven't got this to work s['model'].fit() return None, None, s
def regress_change_on_first_known(y:Y_TYPE, s:dict, k, a:A_TYPE=None, t:T_TYPE =None, e:E_TYPE =None )->([float] , Any , Any): """ Very basic modification of the last value cache. This looks at the contemporaneous influence of a single known in advance variable. Assumes independent increments when estimating the standard deviation. This is also intended to illustrate combination of skaters """ y0 = wrap(y)[0] # Ignore contemporaneous, exogenous variables if not s.get('prev_y0'): s = {'prev_y0':y0, 'd':{} # state for difference predicting skater } return y, 1.0, s else: dy0 = y0 - s['prev_y0'] dy_hat, dy_hat_std = regress_level_on_first_known(y=[dy0], s=s['d'], k=k, a=a, t=t, e=e) x = [y0 + sum_dy for sum_dy in np.cumsum(dy_hat)] x_std = [ math.sqrt(v) for v in np.cumsum([ s**s for s in dy_hat_std])] return x, x_std, s
def divinity_univariate_factory(y: Y_TYPE, s, k: K_TYPE, a=None, t=None, e=None, max_buffer_len=1000, n_warm=101, model_params: dict = None): """ A partial wrapping of the divinity library with notable limitations: - Fits every invocation - Ignores exogenous variables - State is merely a buffer """ y0 = wrap(y)[0] assert n_warm >= 101, ' You must use n_warm' if not s: s = dict(y=[]) if y0 is None: return None, None, s # Ignore suggestion to fit offline # Update buffer s['y'].append(y0) if len(s['y']) > max_buffer_len + 1000: s['y'] = s['y'][-max_buffer_len:] # Fit and predict, if warm, or just last value if len(s['y']) < max(n_warm, MIN_N_WARM): return [y0] * k, [abs(y0)] * k, s else: with no_stdout_stderr(): kwargs = deepcopy(DIVINE_MODEL) if model_params: kwargs.update(**model_params) model = dv.divinity(forecast_length=k, **kwargs) model.fit(np.array(s['y'])) x = list(model.predict()) x_std = [1.0] * k # TODO: fixme return x, x_std, s
def empirical_last_value(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None) -> ([float], Any, Any): """ Last value cache, with empirical std """ if not s.get('p'): s = {'p': {}} # Initialize prediction parade if y is None: return None, None, s else: y0 = wrap(y)[0] # Ignore the rest x = [y0] * k # What a great prediction ! bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) # update residual queue return x, x_std, s
def fbprophet_known_r2(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Uses known-in-advance but not y[1:] """ assert r is not None y0 = [wrap(y)[0]] param_names = ['changepoint_prior_scale', 'seasonality_prior_scale'] return fbprophet_hyperparam_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, r=r, param_names=param_names, recursive=False)
def fbprophet_univariate_r2(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """ assert r is not None y0 = [wrap(y)[0]] param_names = ['changepoint_prior_scale', 'seasonality_prior_scale'] return fbprophet_hyperparam_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e, r=r, param_names=param_names, recursive=False)
def prior_plot(f, y=None, k=None, t=None, e=None, r=None, x0=np.nan, n=150, n_plot=25): """ Apply state machine to univariate series, Show observations and out of sample predictions predictions """ if y is None: y = brownian_with_noise(n=n) if t is None: t = [float(ti) for ti in range(len(y))] x, x_std = prior(f=f, y=y, k=k, a=t, t=t, e=e, r=r, x0=x0) ysf = [[wrap(y_)[0]] for y_ in y] xk = [xt[-1] for xt in x] plot_with_last_value(t=t, x=xk, y=ysf, k=k, n_plot=n_plot)
def hypocratic_ema_r1(y: Y_TYPE, s, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ r : moving average parameter (e.g. 0.75 is fast, 0.95 is slow) """ y0 = wrap(y)[0] assert r is not None x, x_std, s = empirical_ema_r1(y=y0, s=s, k=k, a=a, t=t, e=e, r=r) def hypocratic(x: float, x_std: float, confidence=0.5): """ Shrink residual prediction towards zero """ import math if abs(x_std) < 1e-6 or abs(x) < 1e-3 * x_std: return 0.0 else: return x * math.tanh(confidence * abs(x) / (3 * x_std)) x_resid = [hypocratic(xi, x_std) for xi, x_std in zip(x, x_std)] return x_resid, x_std, s
def trivial_ema_r1(y: Y_TYPE, s, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Exponential moving average r weight to place on existing anchor point """ assert r is not None y0 = wrap(y)[0] if not s.get('rho'): s = {'x': y0, 'rho': r} assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1' else: assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable' if y0 is None: return None, s, None else: s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0 # Make me better ! x = [s['x'] * k] return x, [1.0] * k, s
def observance(y: [float], o: dict, k: int, a: [float] = None): """ This marshals the k-step ahead vector a and the contemporaneous y[1:] and returns a combined vector of all exogenous variables. It tracks a list of x and corresponding y, by putting a's in a FIFO queue and by caching the previous value of y[1:] :param o: state :param k: Number of steps ahead that a is provided :param y: :param a: :returns: x_t:[float] vector combining y[1:] with previously supplied a's """ yw = wrap(y) aw = wrap(a) if not o: o = { 'a': [None for _ in range(k)], 'z': None, # Stores the previous value of y[1:] 'x': list(), 'y': list() } y_t, z = split_exogenous(yw) # Get the contemporaneous variables from last observation if z: z_t = o.get('z') # The previously revealed exogenous variables o['z'] = z # Store for next time else: z = None z_t = None # Determine the known in advance variable pertaining to the present if aw: a_t = o['a'].pop( 0) # The known in advance variable pertaining to this time step o['a'].append(aw) # Put the k-ahead received a value(s) on the queue else: a = None a_t = None # Combine into exogenous variables ... but only if both arrived if aw and z: x_t = z_t + a_t if (z_t and a_t) else None elif aw and not z: x_t = a_t if a_t else None elif (not aw) and z: x_t = z_t if z_t else None elif (not aw) and not z: x_t = None if (not z) and (not aw): o['y'].append([y_t]) # Special case, no need to wait else: if x_t: o['x'].append(x_t) o['y'].append([y_t]) assert len(o['x']) == len(o['y']), "post-condition" return x_t, o
def fbprophet_skater_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, emp_mass: float = 0.0, emp_std_mass: float = 0.0, freq=None, recursive: bool = False, model_params: dict = None, n_max: int = None) -> ([float], Any, Any): """ Prophet skater with running prediction error moments Hyper-parameters are explicit here, whereas they are determined from r in actual skaters. Params of note: a: value of known-in-advance vars k step in advance (not contemporaneous with y) """ assert 0 <= emp_mass <= 1 assert 0 <= emp_std_mass <= 1 if freq is None: freq = PROPHET_META['freq'] if n_max is None: n_max = PROPHET_META['n_max'] y = wrap(y) a = wrap(a) if not s.get('y'): s = {'p': {}, # parade 'y': list(), # historical y 'a': list(), # list of a known k steps in advance 't': list(), 'k': k} else: # Assert immutability of k, dimensions of y,a if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if t is not None: assert isinstance(t,float), 'epoch time please' s['t'].append(t) if len(s['y']) > max(2 * k + 5, PROPHET_META['n_warm']): # Offset y, t, a are supplied to prophet interface t_arg = s['t'][k:] if t is not None else None a_arg = s['a'] y_arg = s['y'][k:] x, x_std, forecast, model = prophet_iskater_factory(y=y_arg, k=k, a=a_arg, t=t_arg, freq=freq, n_max=n_max, recursive=recursive, model_params=model_params) s['m'] = True # Flag indicating a model has been fit (there is no point keeping the model itself, however) else: x = [y[0]] * k x_std = None # Get running mean prediction errors from the prediction parade x_resid, x_resid_std, s['p'] = parade(p=s['p'], x=x, y=y[0]) x_resid = nonecast(x_resid,y[0]) x_resid_std = nonecast(x_resid_std,1.0) # Compute center of mass between bias-corrected and uncorrected predictions x_corrected = np.array(x_resid) + np.array(x) x_center = nonecenter(m=[emp_mass, 1 - emp_mass], x=[x_corrected, x]) x_std_center = nonecenter(m=[emp_std_mass, 1 - emp_std_mass], x=[x_resid_std, x_std]) return x_center, x_std_center, s
def ensemble_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, fs: List = None, rs: List = None, g=None, r=None, include_std=True) -> ([float], Any, Any): """ Ensembles *only* the k-step ahead fs - list of skaters rs - list of hyper-params, if any g - exogenous skater r - hyper-param for g, if any include_std - bool. If True, will add x_std to the exogenous variables sent to g """ if not s.get('s_fs'): s = {'s_fs': [{} for _ in fs], 's_g': {}, 'n_obs': 0} if y is None: return None, None, s else: # Apply models, keeping only the point estimate xjs = list() rs = rs or [None for _ in fs] for j, (f, r) in enumerate(zip(fs, rs)): if r is not None: xj, xj_std, s['s_fs'][j] = f(y=y, s=s['s_fs'][j], k=k, a=a, t=t, e=e, r=r) else: xj, xj_std, s['s_fs'][j] = f(y=y, s=s['s_fs'][j], k=k, a=a, t=t, e=e) xjs.append(xj[-1]) if include_std: xjs.append(xj_std[-1]) s['n_obs'] += 1 if s['n_obs'] < 10: return [wrap(y)[0]] * k, [wrap(y)[0]] * k, s else: y_extend = [wrap(y)[0]] + xjs if r is None: x, x_std, s['s_g'] = g(y=y_extend, s=s['s_g'], k=k, a=a, t=t, e=e) else: x, x_std, s['s_g'] = g(y=y_extend, s=s['s_g'], k=k, a=a, t=t, e=e, r=r) return x, x_std, s
def nproph_skater_factory( y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, method: str = 'default', n_warm=50, model_params: dict = None ) -> (Union[List[float], None], Union[List[float], None], Any): """ Predict using both simultaneously observed and known in advance variables y: Y_TYPE scalar or list where y[1:] are interpreted as contemporaneously observed exogenous variables s: state k: Number of steps ahead to predict a: (optional) scalar or list of variables known k-steps in advance. When calling, provide the known variable k steps ahead, not the contemporaneous one. t: (optional) Time of observation. e: (optional) Maximum computation time (supply e>60 to give hint to do fitting) :returns: x [float] , s', scale [float] Remarks: - Model params cannot be changed after the first invocation. - Allows y=None to be used """ y = wrap(y) a = wrap(a) if not s.get('n_obs'): # Initialize s['n_obs'] = 0 s['model'] = None s['immutable'] = nproph_set_immutable(k=k, y=y, a=a, n_warm=n_warm) s['params'] = nproph_params(method=method) if model_params: s['params'].update(model_params) s['o'] = dict() # Observance else: nproph_check_consistent_usage(y=y, s=s, a=a, k=k) tick(s) if t is not None: pass # Other models might perform an evolution step here. Not applicable to nprophARIMA if y is not None: # Receive observation y[0], possibly exogenous y[1:] and possibly k-in-advance a[:] # Collect from queues the contemporaneous variables s['n_obs'] += 1 y_t, z = split_exogenous(y) x_t, s['o'] = observance(y=y, o=s['o'], k=k, a=a) # Update the npropharima model itself if x_t is not None: if s['model'] is not None: if x_t: s['model'].update([y_t], [x_t]) else: s['model'].update([y_t]) # Predict if s['model'] is None: # Fall back to last value if there is no model calibrated as yet x = [y_t] * k if len(s['o']['x']) > 5 + 2 * k: Y = s['o']['y'][k + 1:] X = s['o']['x'][k + 1:] x_std = [ np.nanstd([xi[0] - yk[0] for xi, yk in zip(X, Y[j:])]) for j in range(1, k + 1) ] else: x_std = [1.0] * k # Fallback to dreadful estimate else: # Predict forward, supplying known data if it exists if not a and not z: z_forward = None else: if not a: z_forward = [z] * k else: z_forward = [list(z) + list(ai) for ai in s['o']['a'] ] # Add known k-steps ahead # This estimate could be improved by predicting z's and attenuating # It is only really a good idea for k=1 x, ntvls = s['model'].predict(n_periods=k, X=z_forward, return_conf_int=True, alpha=s['immutable']['alpha']) x_std = list([ntvl[1] - ntvl[0] for ntvl in ntvls]) # Fit tock(s) if nproph_it_is_time_to_fit(s=s, e=e): tick(s) X = s['o'].get('x') or None Y = s['o']['y'] # s['model'] = pm.auto_arima(y=Y, X=X, **s['params']) s['model'] = NeuralProphet( n_lags=s['params']['n_lags'], changepoints_range=s['params']['changepoints_range'], n_changepoints=s['params']['n_changepoints'], weekly_seasonality=s['params']['weekly_seasonality'], batch_size=s['params']['batch_size'], epochs=s['params']['epochs'], learning_rate=s['params']['learning_rate'], ) dummy_freq = '5min' dummy_start = '2021-01-01' DF = pd.DataFrame(columns=['y'], data=Y) DF['ds'] = pd.date_range(start=dummy_start, periods=len(Y), freq=dummy_freq) s['model'].fit(DF, freq=dummy_freq) print(s['model'].data_params) pprint(tocks(s)) tock(s, 'fit') pprint(tocks(s)) if y is not None: return list(x), list(x_std), s else: return None, None, s
def prophet_iskater_factory(y: [[float]], k: int, a: List = None, t: List = None, e=None, freq: str = None, n_max=1000, recursive: bool = False, model_params: dict = None, return_forecast=True): """ :param y: A list of observations, each a vector. :param k: Number of steps ahead to predict :param a: Known in advance observations - should be k more of these than y's :param t: Epoch times of observations y. If len(t)=len(y)+k the last k are interpreted as future times. :param freq: 'D', '5T' etc, see https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py :param n_max: Maximum number of observations to use, should you wish to prevent prophet from slowing down :param recursive If True, exogenous variables y[1], y[2],... will be predicted forward in time (obviously this adds to computation time) :returns: x k-vector of predictions x_std k-vector of standard deviations forecast full forecast dataframe, familiar to users of fbprophet """ if a: assert len(a) == len(y) + k if isinstance(y[0], float): y = [wrap(yj) for yj in y] # Conversion of epoch times to UTC datetime # User must supply times, len(y) or len(y)+k, or a valid frequency str if t is None: if freq is None or not freq: freq = PROPHET_META['freq'] # Just assume away ... else: assert is_valid_freq( freq), 'Freq ' + str(freq) + ' is not a valid frequency' dt = pd.date_range(start=EPOCH, periods=len(y), freq=freq) # UTC else: freq = infer_freq_from_epoch(t) dt = epoch_to_naive_datetime(t) if len(dt) == len(y) + k: ta = dt dt = dt[:len(y)] else: assert len(dt) == len( y), 'Time vector t should be len(y) or len(y)+k' ta = None # Truncate history so that prophet doesn't take forever to fit y_shorter = y[-n_max:] a_shorter = a[-(n_max + k):] if a is not None else [] # may be empty dt_shorter = dt[-n_max:] # Massage data into Prophet friendly dataframe with columns y, y1, ..., yk, a0,...aj y_cols = [ 'y' + str(i) if i > 0 else 'y' for i in range(len(y_shorter[-1])) ] if a: a_cols = ['a' + str(i) for i in range(len(a_shorter[-1]))] data = [ list(yi) + list(ai) for yi, ai in zip(y_shorter, a_shorter[:-k]) ] df = pd.DataFrame(columns=y_cols + a_cols, data=data) else: data = [list(yi) for yi in y_shorter] df = pd.DataFrame(columns=y_cols, data=data) df['ds'] = dt_shorter # Instantiate Prophet model, ensure defaults are what we think they are kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()]) if model_params: kwargs_used.update(model_params) m = Prophet(**kwargs_used) # Add regressors for y_col in y_cols[1:]: m.add_regressor(name=y_col) if a: for a_col in a_cols: m.add_regressor(name=a_col) # Fit the model every invocation ... there isn't any other way with no_stdout_stderr(): m.fit(df) # Make future dataframe, adding known-in-advance variables future = m.make_future_dataframe(periods=k, freq=freq) if a: for j, a_col in enumerate(a_cols): future[a_col] = [ai[j] for ai in a_shorter] # Known in advance if ta is not None: future['ds'] = ta # override with user supplied future times # Next, we wish to add contemporaneously observed variables # # This is somewhat problematic, for how should we bring exogenously observed variables forward? # The simplest answer is, don't use them - only supply 1-vector y observations # prophet implicitly assumes all exogenous are known, which is a pretty big shortcoming. # # However, if we are trying to support y[1:], ... # - It seems consistent to use prophet to predict these forward, # - It also seems likely that this will lead to over-fitting. # I'm open to ideas here. Perhaps perform some hackery could effect attenuation of the coefficients # assigned to y[1],... such as jiggling past observations. For now we use prophet on each # one individually, feeding them the known in advance 'a' variables. n_exog = len(y[0]) - 1 if n_exog > 0: for j, y_col in enumerate(y_cols): if j > 0: yj = [yi[j] for yi in y_shorter] if recursive: yj_hat, yj_hat_std, yj_forecast, yj_m = prophet_iskater_factory( y=yj, k=k, a=a_shorter, freq=freq, n_max=n_max, recursive=False) else: yj_hat = [yj[-1]] * k future[y_col] = yj + list(yj_hat) # Call the prediction function forecast = m.predict(future) x = list(forecast['yhat'].values[-k:] ) # Use m.plot(forecast) to take a peak # Interpret confidence level difference as scale to be returned. TODO: set alpha properly so this really is 1-std x_std = list([ u - l for u, l in zip(forecast['yhat_upper'].values[-k:], forecast['yhat_lower'].values[-k:]) ]) if return_forecast: return x, x_std, forecast, m else: return x, x_std
def residual_chaser_factory(y :Y_TYPE, s:dict, k:int, a:A_TYPE =None, t:T_TYPE =None, e:E_TYPE =None, f1=None, f2=None, r1=None, r2=None)->([float] , Any , Any): """ Second model predicts k=1, k=k residuals of the first, and interpolates f1 - A skater making the primary prediction f2 - A skater designed to predict residuals ... both 1 step ahead and k-steps ahead r1 - hyper-params for f1, if any r2 - hyper-params for f2, if any It *may* make sense to choose an f2 that shrinks towards zero. """ if k == 1: J = [1] else: J = [1,k] # Determines horizons over which residual model is used. # We'd rather not call the residual model k-times y0 = wrap(y)[0] if not s.get('s1'): s = {'sres': {}, # Residual state ... used to determine the residual 'x': y0, 's1':{}, # First model state 's2':dict([(j,{}) for j in J]), # Residual model states 'n_obs':0} if y0 is None: return None, None, s else: # Use the first skater to predict if r1 is None: x1, x1_std, s['s1'] = f1(y=y,s=s['s1'],k=k, a=a,t=t,e=e) else: x1, x1_std, s['s1'] = f1(y=y, s=s['s1'], k=k, a=a, t=t, e=e, r=r1) resid1, s['sres'] = residual(s['sres'],y=y0,x=x1) s['n_obs']+=1 # Use the second skater to predict j-step ahead residuals # There are two copies of the residual model employed. res_j_hat = [None for j in J] res_j_std = [None for j in J] for jpos,j in enumerate(J): j_ahead_residual = resid1[j-1] if r2 is None: _x, _std, s['s2'][j] = f2(y=j_ahead_residual, s=s['s2'][j], k=j, a=a, t=t, e=e) else: _x, _std, s['s2'][j] = f2(y=j_ahead_residual, s=s['s2'][j], k=j, a=a, t=t, e=e,r=r2) res_j_hat[jpos] = _x[jpos] res_j_std[jpos] = _std[jpos] # Interpolate if k==1: res_interp = res_j_hat res_interp_std = res_j_std else: import numpy as np ks = list(range(1,k+1)) res_interp = np.interp( x=ks, xp=J, fp=res_j_hat ) res_interp_std = np.interp(x=ks, xp=J, fp=res_j_std) # Residual res = y - x1, so x1+res ~ y .... one hopes x_hat = [ resj+x1j for resj, x1j in zip( res_interp, x1) ] return x_hat, res_interp_std, s