Exemple #1
0
 def __init__(self, variable: str, param: op.Param(),
              dloader: xp.DataLoader()):
     self.__variable = variable
     self.__param = param
     self.__dloader = dloader
     #set these
     self.__data = self.fetch_data()
     self.__table = self.__dloader.table
     #
     self.__model = None
     self.__forecast = None
     self.__cv_metrics = cv.CVMetrics()
     self.__trained = self.trainer()
     self.__validated = self.validator()
Exemple #2
0
    df[col + 'k'] = smooth(df[col + 'k'], w, 3)

    #doubling time
    df[col + 'T'] = np.log(2) / df[col + 'k']
    #df.loc[df[col+'T'] > 100, col+'T'] = 0

    # 2nd Derivative
    df[col + 'D2'] = np.gradient(np.gradient(df[col]))
    df[col + 'D2'] = smooth(df[col + 'D2'], w, 5)

    #growth factor
    df[col + 'GF'] = growth_factor(df[col])
    df[col + 'GF'] = smooth(df[col + 'GF'], w, 3)

    # df[i]/df[i-1] = growth ratio
    df[col + 'GR'] = growth_ratio(df[col])
    df[col + 'GR'] = smooth(df[col + 'GR'], w, 5)

    return df


##
country = "Germany"
top = 10
ld = xp.DataLoader(top=top)
df = ld.covid_data
cv = country_view(df, country)
cv = augment_view(cv, "Confirmed")
plot_view(cv, "Confirmed", country, True)
print(country, "\n", cv)
Exemple #3
0

import numpy as np
import proc as xp
import optparam as op
import prophet_trainer as pt

ds = xp.DataLoader().train_ds_confirmed
tb = xp.DataLoader().table
floor_points = [0]#, 10e3, 20e3, 50e3]
cap_lower = 700e3
cap_upper = 1200e3
cap_step = 100e3
cap_benchpoints = np.arange(cap_lower,
                            cap_upper+1,
                            cap_step).tolist()
seasonality_modes = ['multiplicative', 'additive']
future_periods = [21]
changepoint_prior_scales = [0.05, 0.5]
interval_widths = [0.90, 0.95]

print("Logistic scan points:", cap_benchpoints)

#create set of params
optparams = []
for cap in cap_benchpoints:
    for floor in floor_points:
        for smode in seasonality_modes:
            for periods in future_periods:
                for cpps in changepoint_prior_scales:
                    for iw in interval_widths:
Exemple #4
0
import proc as xp

import xquery as xq

q0 = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'")

ld = xp.DataLoader(query=q0)

ld.reporter()
query_raw = xq.Query("Base", "Date > '2020-03-01'")
query_derived = xq.Query("Recent", "")  #RecentDays <=15 ")

y_data_label = 'Confirmed All'

asia = ['Mainland China', 'South Korea', 'Iran']
europe = ['Germany', 'UK', 'Italy', 'Spain',
          'France']  #, 'Greece']#, 'Cyprus']
amerika = ['US']
countries = europe + amerika  # + asia
#countries = ['Germany']

fig, ax = plt.subplots(figsize=(15, 7))

for country in countries:
    dloader = xp.DataLoader(query=query_raw, countries=[country])
    df = dloader.leaders
    if query_derived.query:
        df = df.query(query_derived.query)
    x = df['Days'].to_numpy()
    dx = x[1] - x[0]
    y = df[y_data_label].to_numpy()
    dy = np.gradient(y, dx)
    k = dy / y
    T = np.log(2) / k

    T[T < 0] = 0

    plt.plot(x, T, label=country)

    axes = plt.gca()
Exemple #6
0
import proc as xp
import pandas as pd
import tools as xt
import xquery as xq
odir = 'images/predictions'

#query
qAll = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'")
qGerm = xq.Query("Germany", "Confirmed > 0 and Country == 'Germany'")

#query = qAll; tag = ""
query = qGerm
tag = "Germany"

#data loader
dloader = xp.DataLoader(query=query, arima=True)

df = dloader.train_ds_confirmed['Confirmed']

model = pm.auto_arima(
    df.values,
    start_p=1,
    start_q=1,
    test='adf',  # use adftest to find optimal 'd'
    max_p=4,  # maximum p
    max_q=4,  # maximum q
    m=1,  # frequency of series
    d=1,  # let model determine 'd'
    seasonal=False,  # No Seasonality
    start_P=1,
    D=0,
Exemple #7
0
def fit(country = '',
        query_raw = None,
        query_der = None,
        do_1st_order = True, do_2nd_order = False, show = True):

    odir = 'images/doubling_time'
    y_data_label = 'Confirmed All'
    _countries = []
    _countries.append(country)
    print(_countries)
    dloader = xp.DataLoader(query = query_raw, countries = _countries)
    df = dloader.leaders.query( query_der.query )

    print("Fit:", df.head())

    x_data = np.flip( df['Days'].to_numpy() )
    y_data = np.flip( df[y_data_label].to_numpy() )

    for i in range(0, len(x_data)):
        print( x_data[i], y_data[i])

    nstd = 1 # to draw 5-sigma intervals
    
    #plot
    fig, ax = plt.subplots(figsize=(15,7))

    if do_1st_order:
        params_opt1, params_cov1 = optimize.curve_fit(f = fitfunc1,
                                                      xdata = x_data,
                                                      ydata = y_data,
                                                      p0=[1, 0.1])


        a1, b1     = params_opt1[0], params_opt1[1]
        params_err1   = perrors(params_cov1)
        da1, db1      = errors(params_cov1)

        params_opt_up1   = params_opt1 + nstd * params_err1
        params_opt_down1 = params_opt1 - nstd * params_err1
        
        fit_nom1  = fitfunc1(x_data, *params_opt1)
        fit_up1   = fitfunc1(x_data, *params_opt_up1)
        fit_down1 = fitfunc1(x_data, *params_opt_down1)

        #doubling times
        r = np.log(2) / b1
        dr = r * db1 / b1

        print("1st order")
        print("Opt params", params_opt1)
        print("Opt param errors", da1, db1)
        print("Opt params up", params_opt_up1)
        print("Opt params down", params_opt_down1)
        print("Doubling time with 1st order")
        print("%.2f +/- %.2f"%(r, dr))

        ax.fill_between(x = x_data,
                        y1 = np.array(fit_up1),
                        y2 = np.array(fit_down1),
                        alpha = .25,
                        color = 'red',
                        label = "%d-$\sigma$ interval"%(nstd))

        plt.plot(x_data,
                 fit_nom1,
                 label='fit: a=%5.3f, b=%5.3f' % tuple(params_opt1),
                 color = 'red')

    if do_2nd_order:
        params_opt2, params_cov2 = optimize.curve_fit(f = fitfunc2,
                                                      xdata = x_data,
                                                      ydata = y_data,
                                                      p0=[1, 0.1, 0.001],
                                                      maxfev=1000)


        a2, b2, c2 = params_opt2[0], params_opt2[1], params_opt2[2]
        params_err2   = perrors(params_cov2)
        da2, db2, dc2 = errors(params_cov2)

        params_opt_up2   = params_opt2 + nstd * params_err2
        params_opt_down2 = params_opt2 - nstd * params_err2


        fit_nom2  = fitfunc2(x_data, *params_opt2)
        fit_up2   = fitfunc2(x_data, *params_opt_up2)
        fit_down2 = fitfunc2(x_data, *params_opt_down2)

        r1 = (-a2 + np.sqrt(a2**2 + 4*b2*np.log(2)) ) / (2*b2)
        r2 = (-a2 - np.sqrt(a2**2 + 4*b2*np.log(2)) ) / (2*b2)

        print("2nd order")
        print("Opt params", params_opt2)
        print("Opt param errors", da2, db2, dc2)
        print("Opt params up", params_opt_up2)
        print("Opt params down", params_opt_down2)
        print("Doubling times with 2nd order")
        print(r1)
        print(r2)


        ax.fill_between(x = x_data,
                        y1 = np.array(fit_up2),
                        y2 = np.array(fit_down2),
                        alpha = .25,
                        color = 'blue',
                        label = "%d-$\sigma$ interval"%(nstd))


        plt.plot(x_data,
                 fit_nom2,
                 label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(params_opt2),
                 color = 'blue')

    plt.scatter(x_data,
                y_data,
                label="Data",
                color = 'black')


    plt.ylabel(y_data_label)
    plt.xlabel('Day')
        
    plt.legend(loc='best')


    plt.legend(loc='upper left',fontsize=18)
    plt.tight_layout()
    xt.save(fig, xt.name(odir, country.replace(" ", "_") ))
    if show:
        plt.show()
    plt.close('all')

    return r, dr
#output dir
odir = 'images/predictions'

#queries - cuts
q1 = xq.Query("Subperiod", "Confirmed > 0 and Date > '2020-02-15' and Date < '2021-01-01'")

qMort = xq.Query("Subperiod", "Confirmed > 0 and Date > '2020-02-20' and Date < '2021-01-01'")
qAll = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'")
qGerm = xq.Query("Germany", "Confirmed > 0 and Country == 'Germany'")

#tag = ""; query = qMort; logparams = logparamsGlobal
tag = ""; query = qAll; logparams = logparamsGlobal
#tag = "Germany"; query = qGerm; logparams = logparamsGerm

#data loader
dloader = xp.DataLoader(query = query, logistic_params = logparams, prophet = True)

#forecasting periods
periods = 21

#prediction for confirmed - logistic
if confirmed_logistic:
    param = op.Param(growth = 'logistic',
                     floor = logparams['Confirmed'].floor,
                     cap =  logparams['Confirmed'].cap,
                     smode = "additive",
                     periods = periods,
                     cpps = 0.05,
                     iw = 0.95)
 
    train =  pt.ProphetTrainer("Confirmed", param, dloader)
    plot_acf(df, ax=axes[0, 1])

    # 1st Differencing
    axes[1, 0].plot(df.diff())
    axes[1, 0].set_title('1st Order Differencing')
    plot_acf(df.diff().dropna(), ax=axes[1, 1])

    # 2nd Differencing
    axes[2, 0].plot(df.diff().diff())
    axes[2, 0].set_title('2nd Order Differencing')
    plot_acf(df.diff().diff().dropna(), ax=axes[2, 1])
    return fig, axes


odir = 'images/predictions'
ld = xp.DataLoader(arima=True)

data = ld.train_ds_confirmed
print("arima data:\n", data)

## test statistic
stationary_test_stat(data['Confirmed'])

## data
data_train = data.iloc[ : int(data.shape[0]*0.90) ]
data_valid = data.iloc[ int(data.shape[0]*0.90) : ]
print('Training %d, Validation %d' % (len(data_train), len(data_valid)))

data_train_log = np.log(data_train["Confirmed"])
data_pred = data_valid.copy()