def transform_1d_w_der(self, eval_point): # the first element of the transform is the original eval_point # hence +1 on the next line y_first = np.zeros((eval_point.shape[0], self.n_w_knots + 2)) for i in range(self.n_w_knots + 2): y_first[:, i] = BSpline.construct_fast( self.w_np_knots, (np.arange(len(self.w_knots) + 2) == i).astype(float), 3, extrapolate=False).derivative()(eval_point) return y_first
def transform_1d_l_der(self, eval_point): y_first = np.zeros((eval_point.shape[0], self.n_l_knots + 2)) for i in range(self.n_l_knots + 2): y_first[:, i] = BSpline.construct_fast( self.l_np_knots, (np.arange(len(self.l_knots) + 2) == i).astype(float), 3, extrapolate=False).derivative()(eval_point) return y_first
def transform_1d_l(self, eval_point): # the first element of the transform is the original eval_point # hence +1 on the next line y_py = np.zeros((eval_point.shape[0], self.n_l_knots + 2 + 1)) y_py[:, 0] = eval_point for i in range(self.n_l_knots + 2): y_py[:, i + 1] = BSpline.construct_fast( self.l_np_knots, (np.arange(len(self.l_knots) + 2) == i).astype(float), 3, extrapolate=True)(eval_point) return y_py
def colloc_matrix(x, knots, order, deriv=None): """ Create collocation matrix of a univariate function on `x` in terms of a B-spline representation of order `k`. The computation of the splines is based on the scipy-package. Parameters ---------- x : ndarray, shape (N,) `N` points to evaluate the B-spline at. knots : ndarray, shape >= (k+1,) Vector of knots derived from breaks (with appropriate endpoint multiplicity). order : int, positive Order `k` of the B-spline (4 = cubic). deriv: int, positive, optional Derivative of the B-spline partition (defaults to 0). Returns ------- collmat : ndarray, shape (N, n-k) Collocation matrix, `n` is the size of `knots`. See Also -------- augment_breaks """ if deriv is None: deriv = 0 if deriv >= order: return np.zeros((max(x.size, 1), knots.size - order)) else: # create spline using scipy.interpolate coll = np.empty((max(x.size, 1), knots.size - order)) for n in range(knots.size - order): c = np.zeros(knots.size - order) c[n] = 1. b = BSpline.construct_fast(knots, c, order-1, extrapolate=False) coll[:, n] = b(x, nu=deriv) return coll
def splev( x_new: np.ndarray, t: np.ndarray, c: np.ndarray, k: int = 3, extrapolate: bool | str = True, ) -> np.ndarray: """Generate a BSpline object on the fly from knots and coefficients and evaluate it on x_new. See :class:`scipy.interpolate.BSpline` for all parameters. """ t = _memoryview_safe(t) c = _memoryview_safe(c) x_new = _memoryview_safe(x_new) spline = BSpline.construct_fast(t, c, k, axis=0, extrapolate=extrapolate) return spline(x_new)
def fit(self, X, y=None): """Compute knot positions of splines. Parameters ---------- X : array-like of shape (n_samples, n_features) The data. y : None Ignored. Returns ------- self : object Fitted transformer. """ X = self._validate_data( X, reset=True, accept_sparse=False, ensure_min_samples=2, ensure_2d=True, ) n_samples, n_features = X.shape if not ( isinstance(self.degree, numbers.Integral) and self.degree >= 0 ): raise ValueError("degree must be a non-negative integer.") if not ( isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2 ): raise ValueError("n_knots must be a positive integer >= 2.") if isinstance(self.knots, str) and self.knots in [ "uniform", "quantile", ]: base_knots = self._get_base_knot_positions( X, n_knots=self.n_knots, knots=self.knots ) else: base_knots = check_array(self.knots, dtype=np.float64) if base_knots.shape[0] < 2: raise ValueError( "Number of knots, knots.shape[0], must be >= " "2." ) elif base_knots.shape[1] != n_features: raise ValueError("knots.shape[1] == n_features is violated.") elif not np.all(np.diff(base_knots, axis=0) > 0): raise ValueError("knots must be sorted without duplicates.") if self.extrapolation not in ( "error", "constant", "linear", "continue", "periodic", ): raise ValueError( "extrapolation must be one of 'error', " "'constant', 'linear', 'continue' or 'periodic'." ) if not isinstance(self.include_bias, (bool, np.bool_)): raise ValueError("include_bias must be bool.") # number of knots for base interval n_knots = base_knots.shape[0] if self.extrapolation == "periodic" and n_knots <= self.degree: raise ValueError( "Periodic splines require degree < n_knots. Got n_knots=" f"{n_knots} and degree={self.degree}." ) # number of splines basis functions if self.extrapolation != "periodic": n_splines = n_knots + self.degree - 1 else: # periodic splines have self.degree less degrees of freedom n_splines = n_knots - 1 degree = self.degree n_out = n_features * n_splines # We have to add degree number of knots below, and degree number knots # above the base knots in order to make the spline basis complete. if self.extrapolation == "periodic": # For periodic splines the spacing of the first / last degree knots # needs to be a continuation of the spacing of the last / first # base knots. period = base_knots[-1] - base_knots[0] knots = np.r_[ base_knots[-(degree + 1): -1] - period, base_knots, base_knots[1: (degree + 1)] + period ] else: # Eilers & Marx in "Flexible smoothing with B-splines and # penalties" https://doi.org/10.1214/ss/1038425655 advice # against repeating first and last knot several times, which # would have inferior behaviour at boundaries if combined with # a penalty (hence P-Spline). We follow this advice even if our # splines are unpenalized. Meaning we do not: # knots = np.r_[ # np.tile(base_knots.min(axis=0), reps=[degree, 1]), # base_knots, # np.tile(base_knots.max(axis=0), reps=[degree, 1]) # ] # Instead, we reuse the distance of the 2 fist/last knots. dist_min = base_knots[1] - base_knots[0] dist_max = base_knots[-1] - base_knots[-2] knots = np.r_[ linspace( base_knots[0] - degree * dist_min, base_knots[0] - dist_min, num=degree, ), base_knots, linspace( base_knots[-1] + dist_max, base_knots[-1] + degree * dist_max, num=degree, ), ] # With a diagonal coefficient matrix, we get back the spline basis # elements, i.e. the design matrix of the spline. # Note, BSpline appreciates C-contiguous float64 arrays as c=coef. coef = np.eye(n_splines, dtype=np.float64) if self.extrapolation == "periodic": coef = np.concatenate((coef, coef[:degree, :])) extrapolate = self.extrapolation in ["periodic", "continue"] bsplines = [ BSpline.construct_fast( knots[:, i], coef, self.degree, extrapolate=extrapolate ) for i in range(n_features) ] self.bsplines_ = bsplines self.n_features_out_ = n_out - n_features * (1 - self.include_bias) return self
def _make_random_spline(n=35, k=3): np.random.seed(123) t = np.sort(np.random.random(n+k+1)) c = np.random.random(n) return BSpline.construct_fast(t, c, k)
def _make_random_spline(n=35, k=3): np.random.seed(123) t = np.sort(np.random.random(n + k + 1)) c = np.random.random(n) return BSpline.construct_fast(t, c, k)
class Fitting: """ Fitting of the curve in the initial days to obtain the estimated values for day day march, 16th, 2020, of the pandemic. p: (tau, sigma, rho, delta, gamma1, gamma2). tyme_varying: definitions about beta and mu bspline (knots, number of parameters and order) hmax: max value Runge Kutta integration method. """ def __init__( self, p, time_varying, initial_day='2020-03-16', final_day='2020-07-15', hmax=0.15, init_cond={ 'x0': [0.8, 0.3, 0.00001, 0.00001, 0.00001], 'bounds': [(0, 1), (0, 1), (0, 0.0001), (0, 0.0001), (0, 0.0001)] }): # parameters pre-determined self.p = np.array(p, dtype=np.float64) self.hmax = hmax self.init_cond = init_cond self.initial_day = initial_day self.final_day = final_day # Reading data ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) filename = os.path.join(ROOT_DIR, "../data/covid_data_organized.csv") df = pd.read_csv(filename, index_col=0) self.T = df['confirmed'].loc[initial_day:final_day].to_numpy() self.D = df['deaths'].loc[initial_day:final_day].to_numpy() self.tf = len(self.T) # time-varying hiperparameters self.sbeta = time_varying['beta']['coefficients'] self.order_beta = time_varying['beta']['bspline_order'] self.smu = time_varying['mu']['coefficients'] self.order_mu = time_varying['mu']['bspline_order'] self.knots_beta = np.linspace(0, self.tf, self.sbeta + self.order_beta + 1) self.knots_mu = np.linspace(0, self.tf, self.smu + self.order_mu + 1) # define the time-varying parameters self.beta = BSpline(self.knots_beta, np.zeros(self.sbeta), self.order_beta) self.mu = BSpline(self.knots_mu, np.zeros(self.smu), self.order_mu) # Calculate initial conditions print('Model SEIAQR for Covid-19') print('-------------------------') print('Estimating initial Conditions...') self.initial_conditions() print('Initiation done!') def derivative(self, x, t, alpha, beta_, mu_, tau, sigma, rho, delta, gamma1, gamma2): """ System of derivatives simplified. """ beta = max(beta_(t), 0) mu = max(mu_(t), 0) dx = np.zeros(shape=(len(x), )) dx[4] = -beta * x[4] * (x[1] + x[2]) dx[0] = -dx[4] - (rho * delta + tau) * x[0] dx[1] = tau * x[0] - (sigma + rho) * x[1] dx[2] = sigma * alpha * x[1] - (gamma1 + rho) * x[2] dx[5] = gamma1 * x[2] + gamma2 * x[3] dx[6] = mu * x[3] dx[7] = sigma * (1 - alpha) * x[1] + rho * (delta * x[0] + x[1] + x[2]) dx[3] = dx[7] - gamma2 * x[3] - dx[6] return dx def integrate(self, theta, p, time=[]): """ Integrate the system given a tuple of parameters. p is the parameters estimated by the literature. time is a list always with 0 that indicates where to integrate. """ if len(time) == 0: time = range(self.tf) self.beta = self.beta.construct_fast(self.knots_beta, theta[1:1 + self.sbeta], self.order_beta) self.mu = self.mu.construct_fast(self.knots_mu, theta[-self.smu:], self.order_mu) self.states = odeint(func=self.derivative, y0=self.y0, t=time, args=(theta[0], self.beta, self.mu, *p), hmax=self.hmax) return self.states def rt_calculation(self, theta): """ Calculate the reproduction number based on the model. """ S = self.states[:, 4] self.repro_number = np.zeros(shape=(2, self.tf)) beta_ = self.beta.construct_fast(self.knots_beta, theta[1:1 + self.sbeta], self.order_beta) mu_ = self.mu.construct_fast(self.knots_mu, theta[-self.smu:], self.order_mu) alpha = theta[0] tau, sigma, rho, delta, gamma1, _ = self.p for t in range(self.tf): beta = max(beta_(t), 0) mu = max(mu_(t), 0) varphi = np.array([beta * tau, beta * tau * S[t] ]) # difference between R0 and Rt varphi /= ((rho * delta + tau) * (sigma + rho)) r0_rt = 1 / 2 * (varphi + np.sqrt(varphi**2 + varphi * (4 * sigma * alpha) / (rho + gamma1))) self.repro_number[:, t] = r0_rt def initial_conditions(self): """ Estimate Initial Conditions """ parameters = self.p[[0, 1, 4, 5]] model = FittingInitial(parameters, self.initial_day, self.hmax) E0, I0, A0, _, Q0, R0 = model.get_initial_values( self.init_cond['x0'], self.init_cond['bounds']) self.initial_phase = model.y T0 = self.T[0] D0 = self.D[0] S0 = 1 - E0 - I0 - A0 - Q0 - R0 self.y0 = [E0, I0, A0, Q0 - D0, S0, R0, D0, T0] def objective(self, theta, psi): # theta = (alpha, beta_1, ..., beta_s, mu_1, ..., mu_r) integrate = self.integrate(theta, self.p) obj1 = (self.T - integrate[:, 7]) @ self.weights @ (self.T - integrate[:, 7]) obj2 = (self.D - integrate[:, 6]) @ self.weights @ (self.D - integrate[:, 6]) obj = 100 * (obj1 + psi * obj2) return obj def fit(self, psi, x0, bounds, algorithm='L-BFGS-B'): """ Fits the model to the data and recover the estimated parameters. """ self.weights = np.array([[min(i, j) + 15 for i in range(self.tf)] for j in range(self.tf)]) self.weights = np.linalg.inv(self.weights) print('Starting estimation!') t0 = time.time() res = minimize(fun=self.objective, x0=x0, method=algorithm, bounds=bounds, args=(psi, )) self.counter = time.time() - t0 print('Estimation finished. It took {} seconds'.format(self.counter)) curve = self.integrate(res.x, self.p) # Rt calculation self.rt_calculation(res.x) # Store important values self.obj = res.fun self.res = res self.theta = res.x self.psi = psi self.x0 = x0 self.bounds = bounds self.algorithm = algorithm n = self.tf K = len(self.theta) # Estimate variances self.sigma2_1 = (self.T - curve[:, 7]) @ self.weights @ ( self.T - curve[:, 7]) / (n - K) self.sigma2_2 = (self.D - curve[:, 6]) @ self.weights @ ( self.D - curve[:, 6]) / (n - K) # Information Criterion common = n * np.log(self.obj / n) self.aic = common + 2 * K self.bic = common + np.log(n) * K self.aicc = common + 2 * K * n / (n - K - 1) return res.x def check_residuals(self): """ Simple residual analysis for the fitting. It must be called after the function fit. """ diary_curves = self.integrate(self.theta, self.p) T = diary_curves[:, 7] D = diary_curves[:, 6] errorT = np.diff(self.T - T) errorD = np.diff(self.D - D) return errorT, errorD def correlation_matrix(self): """ Calculate the correlation matrix with an estimated parameter. It must be called after the function fit. """ def f(parameters, time, curve): theta = parameters[0:len(self.theta)] #p = parameters[len(self.theta):] return self.integrate(theta, self.p, [0, time])[1, curve] K = len(self.theta) #+ len(self.p) J1 = np.zeros((self.tf, K)) J2 = np.zeros((self.tf, K)) parameters = self.theta #np.concatenate([self.theta, self.p]) for i in range(self.tf): J1[i, :] = approx_fprime(parameters, f, np.ones_like(parameters) * 1e-5, i, 7) J2[i, :] = approx_fprime(parameters, f, np.ones_like(parameters) * 1e-5, i, 6) # Fisher Information matrix FIM = J1.transpose() @ self.weights @ J1 / self.sigma2_1 + J2.transpose( ) @ self.weights @ J2 / self.sigma2_2 # Covariance matrix C = np.linalg.inv(FIM) # Correlation matrix R = [[C[i, j] / np.sqrt(C[i, i] * C[j, j]) for i in range(K)] for j in range(K)] return np.array(R) def _get_exp(self, pathname): with open(pathname, 'r') as f: line = f.readline() while line != '': lineold = line line = f.readline() exp = lineold[:lineold.find(';')] exp = 1 if exp == 'exp' else int(exp) + 1 return exp def save_experiment(self, objective_function): """ Save information about the experiment. objective_function: name given to compare, like quadratic and divided. """ pathname = '../experiments/' + objective_function + '.csv' if not os.path.exists(pathname): with open(pathname, 'w') as f: f.write( 'exp;tau;sigma;rho;delta;gamma1;gamma2;sbeta;order_beta;smu;order_mu;' ) f.write('initial_day;final_day;hmax;psi;x0;bounds;algorithm;') f.write('E0;I0;A0;Q0;R0;D0;T0;alpha;beta;mu;obj;time') f.write('\n') else: with open(pathname, 'a') as f: exp = self._get_exp(pathname) tau, sigma, rho, delta, gamma1, gamma2 = self.p info = [ exp, tau, sigma, rho, delta, gamma1, gamma2, self.sbeta, self.order_beta, self.smu, self.order_mu ] info2 = [self.initial_day, self.final_day, self.hmax, self.psi] f.write(';'.join(map(str, info))) f.write(';') f.write(';'.join(map(str, info2))) f.write(';') f.write(str(self.x0)) f.write(';') f.write(str(self.bounds)) f.write(';') f.write(self.algorithm + ';') f.write(';'.join(map(str, self.y0))) f.write(str(self.theta[0]) + ';') f.write(str(self.theta[1:1 + self.sbeta])) f.write(';') f.write(str(self.theta[-self.smu:])) f.write(';') f.write(str(self.obj) + ';') f.write(str(self.counter)) f.write('\n') # if __name__ == '__main__': # p = [0.3125, 0.5, 2e-5, 1, 1/9.5, 1/18] # beta = {'sbeta': 4, 'bspline_order': 3} # model = Fitting(p, beta) # psi = 0 # bounds = [(0.7,0.95), (0.05,0.3), (0.05,0.3), (0.05,0.3), (0.05,0.3), (0, 0.2)] # bound the parameters # x0 = [0.9, 0.1, 0.1, 0.1, 0.1, 0.12/14] # initial guess # theta = model.fit(psi, x0, bounds)