def fbprophet_univariate_best_r() -> float: """ :return: Provides best r for a randomly chosen data stream Takes about 12 hrs to run """ mr = MicroReader() names = mr.get_stream_names() okay = False while not okay: name = random.choice(names) n_obs = len(mr.get_lagged_values(name=name)) okay = n_obs > PROPHET_META['n_warm'] + 50 and '~' not in name url = 'https://www.microprediction.org/stream_dashboard.html?stream=' + name.replace( '.json', '') print('We will find the best fbprophet hyper-parameters for ' + url) print('There are ' + str(n_obs) + ' observations in the series.') print( "Prophet will be fit for most of them, after a burn_in, and for many different hyper-params. Don't hold your breathe." ) best_r, best_value, info = optimal_r_for_stream( f=fbprophet_univariate_r2, name=name, k=10, optimizer=dlib_default_cube, n_burn=PROPHET_META['n_warm'] + 20, n_trials=50, n_dim=2) pprint(info) params = prophet_params(r=best_r, dim=2) pprint(params)
def stream_data(name: str, n_obs: int): """ values and times for a univariate stream """ mr = MicroReader() lagged_values, lagged_times = mr.get_lagged_values_and_times( name=name, count=n_obs) y, t = list(reversed(lagged_values)), list(reversed(lagged_times)) return y, t
def random_regular(min_len=500): """ Randomly selected univariate series :return: y, t, url """ mr = MicroReader() names = mr.get_stream_names() okay = False while not okay: name = random.choice(names) n_obs = len(mr.get_lagged_values(name=name,count=10000)) okay = n_obs > min_len and '~' not in name url = 'https://www.microprediction.org/stream_dashboard.html?stream=' +name.replace('.json','') return name, url
def next_opinionated_forecast(n_train, k, n_recent, multiple, name=None): while True: try: from microprediction import MicroReader n_obs = 0 while n_obs < 1000: mr = MicroReader() if name is None: names = mr.get_stream_names() random_name = random.choice(names) else: random_name = name lag_values, lag_times = mr.get_lagged_values_and_times( name=random_name, count=2000) y = list(reversed(lag_values)) t = list(reversed(lag_times)) n_obs = len(y) except ImportError: from timemachines.skatertools.data import hospital y = hospital() t = [15 * 60 * i for i in range(len(y))] name = 'hospital' for i in [100 * j for j in range(10)]: print('Looking at ' + random_name + ' ' + str(i) + '/1000') if len(y) > i + 2 * k + n_train: y_fit = y[i:i + n_train] t_fit = t[i:i + n_train] y_hats, _, forecast, m = prophet_iskater_factory(y=y_fit, k=k, t=t_fit) if is_opinonated(y=y_fit, forecast=forecast, k=k, n_recent=n_recent, multiple=multiple): y_3avg = np.mean( y[i + n_train - 3:i + n_train]) # avg of last three points y_true_mean = np.mean( y[i + n_train + k - 1:i + n_train + k + 1]) # avg of 3 future points error = (y_hats[-1] - y_true_mean) / abs(0.01 + y_3avg) avg_error = (y_3avg - y_true_mean) / abs(0.01 + y_3avg) return forecast, m, random_name, error, avg_error, y[ i + n_train:i + n_train + k] print(random_name + ' is okay.') return None, None, None
def random_stream_name(min_len=500, exclude_str=None, include_str=None): """ Randomly selected univariate series :return: y, t, url """ mr = MicroReader() names = mr.get_stream_names() okay = False while not okay: name = random.choice(names) n_obs = len(mr.get_lagged_values(name=name, count=10000)) okay = True if exclude_str is not None and exclude_str in name: okay = False if include_str is not None and include_str not in name: okay = False if n_obs < min_len: okay = False url = 'https://www.microprediction.org/stream_dashboard.html?stream=' + name.replace( '.json', '') return name, url
def optimal_r_for_stream( f, name: str, k: int, evaluator=None, optimizer=None, n_trials=None, n_dim=None, n_burn: int = None, test_objective_first=True ) -> (float, float, dict): # best_r, best_val, info """ Find the best hyper-parameters for a univariate skater using live from www.microprediction.org :param f: :param name: Choose from https://www.microprediction.org/browse_streams.html but add '.json' to the end :param k: :param evaluator: :param optimizer: :param n_trials: :param n_dim: :param n_burn: :param test_objective_first: :return: best_r, best_value, info """ mr = MicroReader() lagged_values, lagged_times = mr.get_lagged_values_and_times(name=name) t = list(reversed(lagged_times)) y = list(reversed(lagged_values)) return optimal_r(f=f, y=y, k=k, a=None, t=t, e=None, evaluator=evaluator, optimizer=optimizer, n_trials=n_trials, n_dim=n_dim, n_burn=n_burn, test_objective_first=test_objective_first)
from microprediction import MicroReader import random # Live tests mr = MicroReader(base_url='https://devapi.microprediction.org') TEST_STREAM = random.choice(list(mr.get_streams().items()))[0] DIE = 'die.json' print(TEST_STREAM) def test_getters(): """ Not a great test :) """ assert mr.get_current_value(DIE) assert len(mr.get_lagged_values(DIE)) > 10 for delay in mr.DELAYS: p1 = mr.get_discrete_pdf_lagged(name=DIE, delay=delay) assert p1 is not None if p1.get('x'): assert abs(p1['y'][0] - 0.1666 ) < 0.1, "Oh man this die market is so inefficient!" def test_z_getters(): zs = mr.get_lagged_zvalues(name='z2~copula_x~copula_y~70.json', count=3) ps = mr.get_lagged_copulas(name='z2~copula_x~copula_y~70.json', count=3) assert len(zs) == 3 assert len(zs[0]) == 2 assert len(ps) == 3 assert len(ps[0]) == 2 assert ps[0][0] <= 1 assert ps[0][0] >= 0
def plot_helicopter_lags(): """ Plot a subset of the SciML helicopter challenge data .. psi only """ mr = MicroReader() xs = mr.get_lagged_values('helicopter_psi.json') plt.plot(xs)
# A collection of mostly standalone functions illustrating copula functionality at dev.microprediction.org # See also https://github.com/microprediction/PDCI/blob/master/helicopula.ipynb # See article at https://www.linkedin.com/pulse/helicopulas-peter-cotton-phd/ import pandas as pd import matplotlib.pyplot as plt from microprediction import MicroReader, MicroWriter, new_key from copulas.multivariate import GaussianMultivariate import numpy as np from pprint import pprint mr = MicroReader() def get_wind_z2(): """ Retrieve bivariate wind data lagged values """ HELISTREAM = 'z2~seattle_wind_direction~seattle_wind_speed~70.json' lagged_values = mr.get_lagged_values(name=HELISTREAM) return lagged_values def plot_helicopter_data(): """ Plot SciML helicopter challenge data """ pd.read_csv( 'https://raw.githubusercontent.com/SciML/HelicopterSciML.jl/master/data/Lab-Helicopter_Experimental-data.csv' ).plot() def plot_helicopter_lags(): """ Plot a subset of the SciML helicopter challenge data .. psi only """
def test_internet(): mr = MicroReader() streams = mr.get_sponsors() assert len(streams) > 10
def test_imports(): mr = MicroReader() values = mr.get_lagged_values(name='cop.json') assert len(values)>500
from microprediction.univariate.expnormdist import ExpNormDist, DEFAULT_EXPNORM_LOWER import os import numpy as np from copy import deepcopy import time from microprediction import MicroReader mr = MicroReader() STREAMS = mr.get_stream_names() DEFAULT_EXPNORM_PARAMS = {'g1': 0.5, 'g2': 5.0, 'logK': -2., 'loc': 0.0, 'logScale': 0.0} DEFAULT_EXPNORM_LOWER = {'g1': 0.001, 'g2': 0.001, 'logK': -5, 'loc': -0.15, 'logScale': -4} DEFAULT_EXPNORM_UPPER = {'g1': 1.0, 'g2': 15.0, 'logK': 1, 'loc': 0.15, 'logScale': 4.0} OFFLINE_EXPNORM_HYPER = {'lower_bounds': deepcopy(DEFAULT_EXPNORM_LOWER), 'upper_bounds': deepcopy(DEFAULT_EXPNORM_UPPER), 'space': None, 'algo': None, 'max_evals': 3} class ExpNormAccumulator(ExpNormDist): def __init__(self, **kwargs): super().__init__(**kwargs) def anchors(self, lagged_values, lagged_times): def post_getter(state,value): return state['anchor'] return self.replay(lagged_values=lagged_values, lagged_times=lagged_times, post_getter=post_getter) def manual_loss(self, lagged_values, lagged_times, params, state, burn_in=10): """ Loss function for a series of values, calculated manually as check """
def test_delays_1(): mc = MicroReader() assert len(mc.DELAYS) == 3
from microprediction import MicroReader from pprint import pprint import numpy as np import matplotlib.pyplot as plt def to_density(cdf): """ """ # CDF is a fast, noisy O(1) approximation so this isn't the greatest dys = np.diff([0] + cdf['y']) dxs = np.diff([cdf['x'][0] - 1.0] + cdf['x']) dsty = [dy / dx for dx, dy in zip(dxs, dys)] return [d / sum(dsty) for d in dsty] if __name__ == "__main__": mr = MicroReader() HOUR = mr.DELAYS[3] cdf = mr.get_cdf(name='altitude.json', delay=HOUR) # plt.plot(cdf['x'], cdf['y']) plt.plot(cdf['x'], to_density(cdf)) plt.show() print( 'https://www.microprediction.org/stream_dashboard.html?stream=altitude.json' )