예제 #1
0
 def transform_1d_w_der(self, eval_point):
     # the first element of the transform is the original eval_point
     # hence +1 on the next line
     y_first = np.zeros((eval_point.shape[0], self.n_w_knots + 2))
     for i in range(self.n_w_knots + 2):
         y_first[:, i] = BSpline.construct_fast(
             self.w_np_knots,
             (np.arange(len(self.w_knots) + 2) == i).astype(float),
             3,
             extrapolate=False).derivative()(eval_point)
     return y_first
예제 #2
0
    def transform_1d_l_der(self, eval_point):

        y_first = np.zeros((eval_point.shape[0], self.n_l_knots + 2))

        for i in range(self.n_l_knots + 2):
            y_first[:, i] = BSpline.construct_fast(
                self.l_np_knots,
                (np.arange(len(self.l_knots) + 2) == i).astype(float),
                3,
                extrapolate=False).derivative()(eval_point)
        return y_first
예제 #3
0
 def transform_1d_l(self, eval_point):
     # the first element of the transform is the original eval_point
     # hence +1 on the next line
     y_py = np.zeros((eval_point.shape[0], self.n_l_knots + 2 + 1))
     y_py[:, 0] = eval_point
     for i in range(self.n_l_knots + 2):
         y_py[:, i + 1] = BSpline.construct_fast(
             self.l_np_knots,
             (np.arange(len(self.l_knots) + 2) == i).astype(float),
             3,
             extrapolate=True)(eval_point)
     return y_py
예제 #4
0
def colloc_matrix(x, knots, order, deriv=None):
    """
    Create collocation matrix of a univariate function on `x` in terms  of a
    B-spline representation of order `k`.

    The computation of the splines is based on the scipy-package.

    Parameters
    ----------
    x : ndarray, shape (N,)
        `N` points to evaluate the B-spline at.
    knots : ndarray, shape >= (k+1,)
        Vector of knots derived from breaks (with appropriate endpoint
        multiplicity).
    order : int, positive
        Order `k` of the B-spline (4 = cubic).
    deriv: int, positive, optional
        Derivative of the B-spline partition (defaults to 0).

    Returns
    -------
    collmat : ndarray, shape (N, n-k)
        Collocation matrix, `n` is the size of `knots`.

    See Also
    --------
    augment_breaks

    """

    if deriv is None:
        deriv = 0

    if deriv >= order:
        return np.zeros((max(x.size, 1), knots.size - order))

    else:
        # create spline using scipy.interpolate
        coll = np.empty((max(x.size, 1), knots.size - order))

        for n in range(knots.size - order):
            c = np.zeros(knots.size - order)
            c[n] = 1.

            b = BSpline.construct_fast(knots, c, order-1, extrapolate=False)

            coll[:, n] = b(x, nu=deriv)

        return coll
예제 #5
0
def splev(
    x_new: np.ndarray,
    t: np.ndarray,
    c: np.ndarray,
    k: int = 3,
    extrapolate: bool | str = True,
) -> np.ndarray:
    """Generate a BSpline object on the fly from knots and coefficients and
    evaluate it on x_new.

    See :class:`scipy.interpolate.BSpline` for all parameters.
    """
    t = _memoryview_safe(t)
    c = _memoryview_safe(c)
    x_new = _memoryview_safe(x_new)
    spline = BSpline.construct_fast(t, c, k, axis=0, extrapolate=extrapolate)
    return spline(x_new)
예제 #6
0
    def fit(self, X, y=None):
        """Compute knot positions of splines.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The data.

        y : None
            Ignored.

        Returns
        -------
        self : object
            Fitted transformer.
        """
        X = self._validate_data(
            X,
            reset=True,
            accept_sparse=False,
            ensure_min_samples=2,
            ensure_2d=True,
        )
        n_samples, n_features = X.shape

        if not (
            isinstance(self.degree, numbers.Integral) and self.degree >= 0
        ):
            raise ValueError("degree must be a non-negative integer.")

        if not (
            isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2
        ):
            raise ValueError("n_knots must be a positive integer >= 2.")

        if isinstance(self.knots, str) and self.knots in [
            "uniform",
            "quantile",
        ]:
            base_knots = self._get_base_knot_positions(
                X, n_knots=self.n_knots, knots=self.knots
            )
        else:
            base_knots = check_array(self.knots, dtype=np.float64)
            if base_knots.shape[0] < 2:
                raise ValueError(
                    "Number of knots, knots.shape[0], must be >= " "2."
                )
            elif base_knots.shape[1] != n_features:
                raise ValueError("knots.shape[1] == n_features is violated.")
            elif not np.all(np.diff(base_knots, axis=0) > 0):
                raise ValueError("knots must be sorted without duplicates.")

        if self.extrapolation not in (
            "error",
            "constant",
            "linear",
            "continue",
            "periodic",
        ):
            raise ValueError(
                "extrapolation must be one of 'error', "
                "'constant', 'linear', 'continue' or 'periodic'."
            )

        if not isinstance(self.include_bias, (bool, np.bool_)):
            raise ValueError("include_bias must be bool.")

        # number of knots for base interval
        n_knots = base_knots.shape[0]

        if self.extrapolation == "periodic" and n_knots <= self.degree:
            raise ValueError(
                "Periodic splines require degree < n_knots. Got n_knots="
                f"{n_knots} and degree={self.degree}."
            )

        # number of splines basis functions
        if self.extrapolation != "periodic":
            n_splines = n_knots + self.degree - 1
        else:
            # periodic splines have self.degree less degrees of freedom
            n_splines = n_knots - 1

        degree = self.degree
        n_out = n_features * n_splines
        # We have to add degree number of knots below, and degree number knots
        # above the base knots in order to make the spline basis complete.
        if self.extrapolation == "periodic":
            # For periodic splines the spacing of the first / last degree knots
            # needs to be a continuation of the spacing of the last / first
            # base knots.
            period = base_knots[-1] - base_knots[0]
            knots = np.r_[
                base_knots[-(degree + 1): -1] - period,
                base_knots,
                base_knots[1: (degree + 1)] + period
            ]

        else:
            # Eilers & Marx in "Flexible smoothing with B-splines and
            # penalties" https://doi.org/10.1214/ss/1038425655 advice
            # against repeating first and last knot several times, which
            # would have inferior behaviour at boundaries if combined with
            # a penalty (hence P-Spline). We follow this advice even if our
            # splines are unpenalized. Meaning we do not:
            # knots = np.r_[
            #     np.tile(base_knots.min(axis=0), reps=[degree, 1]),
            #     base_knots,
            #     np.tile(base_knots.max(axis=0), reps=[degree, 1])
            # ]
            # Instead, we reuse the distance of the 2 fist/last knots.
            dist_min = base_knots[1] - base_knots[0]
            dist_max = base_knots[-1] - base_knots[-2]

            knots = np.r_[
                linspace(
                    base_knots[0] - degree * dist_min,
                    base_knots[0] - dist_min,
                    num=degree,
                ),
                base_knots,
                linspace(
                    base_knots[-1] + dist_max,
                    base_knots[-1] + degree * dist_max,
                    num=degree,
                ),
            ]

        # With a diagonal coefficient matrix, we get back the spline basis
        # elements, i.e. the design matrix of the spline.
        # Note, BSpline appreciates C-contiguous float64 arrays as c=coef.
        coef = np.eye(n_splines, dtype=np.float64)
        if self.extrapolation == "periodic":
            coef = np.concatenate((coef, coef[:degree, :]))

        extrapolate = self.extrapolation in ["periodic", "continue"]

        bsplines = [
            BSpline.construct_fast(
                knots[:, i], coef, self.degree, extrapolate=extrapolate
            )
            for i in range(n_features)
        ]
        self.bsplines_ = bsplines

        self.n_features_out_ = n_out - n_features * (1 - self.include_bias)
        return self
예제 #7
0
def _make_random_spline(n=35, k=3):
    np.random.seed(123)
    t = np.sort(np.random.random(n+k+1))
    c = np.random.random(n)
    return BSpline.construct_fast(t, c, k)
예제 #8
0
def _make_random_spline(n=35, k=3):
    np.random.seed(123)
    t = np.sort(np.random.random(n + k + 1))
    c = np.random.random(n)
    return BSpline.construct_fast(t, c, k)
예제 #9
0
class Fitting:
    """
    Fitting of the curve in the initial days to obtain the estimated values
    for day day march, 16th, 2020, of the pandemic.
    p: (tau, sigma, rho, delta, gamma1, gamma2).
    tyme_varying: definitions about beta and mu bspline (knots, number of parameters and order)
    hmax: max value Runge Kutta integration method. 
    """
    def __init__(
        self,
        p,
        time_varying,
        initial_day='2020-03-16',
        final_day='2020-07-15',
        hmax=0.15,
        init_cond={
            'x0': [0.8, 0.3, 0.00001, 0.00001, 0.00001],
            'bounds': [(0, 1), (0, 1), (0, 0.0001), (0, 0.0001), (0, 0.0001)]
        }):

        # parameters pre-determined
        self.p = np.array(p, dtype=np.float64)
        self.hmax = hmax
        self.init_cond = init_cond
        self.initial_day = initial_day
        self.final_day = final_day

        # Reading data
        ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
        filename = os.path.join(ROOT_DIR, "../data/covid_data_organized.csv")
        df = pd.read_csv(filename, index_col=0)
        self.T = df['confirmed'].loc[initial_day:final_day].to_numpy()
        self.D = df['deaths'].loc[initial_day:final_day].to_numpy()
        self.tf = len(self.T)

        # time-varying hiperparameters
        self.sbeta = time_varying['beta']['coefficients']
        self.order_beta = time_varying['beta']['bspline_order']
        self.smu = time_varying['mu']['coefficients']
        self.order_mu = time_varying['mu']['bspline_order']
        self.knots_beta = np.linspace(0, self.tf,
                                      self.sbeta + self.order_beta + 1)
        self.knots_mu = np.linspace(0, self.tf, self.smu + self.order_mu + 1)
        # define the time-varying parameters
        self.beta = BSpline(self.knots_beta, np.zeros(self.sbeta),
                            self.order_beta)
        self.mu = BSpline(self.knots_mu, np.zeros(self.smu), self.order_mu)

        # Calculate initial conditions
        print('Model SEIAQR for Covid-19')
        print('-------------------------')
        print('Estimating initial Conditions...')
        self.initial_conditions()
        print('Initiation done!')

    def derivative(self, x, t, alpha, beta_, mu_, tau, sigma, rho, delta,
                   gamma1, gamma2):
        """
        System of derivatives simplified.
        """

        beta = max(beta_(t), 0)
        mu = max(mu_(t), 0)

        dx = np.zeros(shape=(len(x), ))
        dx[4] = -beta * x[4] * (x[1] + x[2])
        dx[0] = -dx[4] - (rho * delta + tau) * x[0]
        dx[1] = tau * x[0] - (sigma + rho) * x[1]
        dx[2] = sigma * alpha * x[1] - (gamma1 + rho) * x[2]
        dx[5] = gamma1 * x[2] + gamma2 * x[3]
        dx[6] = mu * x[3]
        dx[7] = sigma * (1 - alpha) * x[1] + rho * (delta * x[0] + x[1] + x[2])
        dx[3] = dx[7] - gamma2 * x[3] - dx[6]

        return dx

    def integrate(self, theta, p, time=[]):
        """
        Integrate the system given a tuple of parameters.
        p is the parameters estimated by the literature. time is a list always
        with 0 that indicates where to integrate. 
        """
        if len(time) == 0:
            time = range(self.tf)
        self.beta = self.beta.construct_fast(self.knots_beta,
                                             theta[1:1 + self.sbeta],
                                             self.order_beta)
        self.mu = self.mu.construct_fast(self.knots_mu, theta[-self.smu:],
                                         self.order_mu)
        self.states = odeint(func=self.derivative,
                             y0=self.y0,
                             t=time,
                             args=(theta[0], self.beta, self.mu, *p),
                             hmax=self.hmax)
        return self.states

    def rt_calculation(self, theta):
        """
        Calculate the reproduction number based on the model.
        """
        S = self.states[:, 4]
        self.repro_number = np.zeros(shape=(2, self.tf))

        beta_ = self.beta.construct_fast(self.knots_beta,
                                         theta[1:1 + self.sbeta],
                                         self.order_beta)
        mu_ = self.mu.construct_fast(self.knots_mu, theta[-self.smu:],
                                     self.order_mu)
        alpha = theta[0]
        tau, sigma, rho, delta, gamma1, _ = self.p

        for t in range(self.tf):
            beta = max(beta_(t), 0)
            mu = max(mu_(t), 0)
            varphi = np.array([beta * tau, beta * tau * S[t]
                               ])  # difference between R0 and Rt
            varphi /= ((rho * delta + tau) * (sigma + rho))
            r0_rt = 1 / 2 * (varphi + np.sqrt(varphi**2 + varphi *
                                              (4 * sigma * alpha) /
                                              (rho + gamma1)))
            self.repro_number[:, t] = r0_rt

    def initial_conditions(self):
        """
        Estimate Initial Conditions
        """
        parameters = self.p[[0, 1, 4, 5]]
        model = FittingInitial(parameters, self.initial_day, self.hmax)
        E0, I0, A0, _, Q0, R0 = model.get_initial_values(
            self.init_cond['x0'], self.init_cond['bounds'])
        self.initial_phase = model.y
        T0 = self.T[0]
        D0 = self.D[0]
        S0 = 1 - E0 - I0 - A0 - Q0 - R0
        self.y0 = [E0, I0, A0, Q0 - D0, S0, R0, D0, T0]

    def objective(self, theta, psi):
        # theta = (alpha, beta_1, ..., beta_s, mu_1, ..., mu_r)
        integrate = self.integrate(theta, self.p)
        obj1 = (self.T - integrate[:, 7]) @ self.weights @ (self.T -
                                                            integrate[:, 7])
        obj2 = (self.D - integrate[:, 6]) @ self.weights @ (self.D -
                                                            integrate[:, 6])

        obj = 100 * (obj1 + psi * obj2)

        return obj

    def fit(self, psi, x0, bounds, algorithm='L-BFGS-B'):
        """
        Fits the model to the data and recover the estimated parameters. 
        """
        self.weights = np.array([[min(i, j) + 15 for i in range(self.tf)]
                                 for j in range(self.tf)])
        self.weights = np.linalg.inv(self.weights)

        print('Starting estimation!')
        t0 = time.time()
        res = minimize(fun=self.objective,
                       x0=x0,
                       method=algorithm,
                       bounds=bounds,
                       args=(psi, ))
        self.counter = time.time() - t0
        print('Estimation finished. It took {} seconds'.format(self.counter))

        curve = self.integrate(res.x, self.p)

        # Rt calculation
        self.rt_calculation(res.x)

        # Store important values
        self.obj = res.fun
        self.res = res
        self.theta = res.x
        self.psi = psi
        self.x0 = x0
        self.bounds = bounds
        self.algorithm = algorithm

        n = self.tf
        K = len(self.theta)

        # Estimate variances
        self.sigma2_1 = (self.T - curve[:, 7]) @ self.weights @ (
            self.T - curve[:, 7]) / (n - K)
        self.sigma2_2 = (self.D - curve[:, 6]) @ self.weights @ (
            self.D - curve[:, 6]) / (n - K)

        # Information Criterion
        common = n * np.log(self.obj / n)
        self.aic = common + 2 * K
        self.bic = common + np.log(n) * K
        self.aicc = common + 2 * K * n / (n - K - 1)

        return res.x

    def check_residuals(self):
        """
        Simple residual analysis for the fitting. It must be called after the
        function fit. 
        """
        diary_curves = self.integrate(self.theta, self.p)

        T = diary_curves[:, 7]
        D = diary_curves[:, 6]

        errorT = np.diff(self.T - T)
        errorD = np.diff(self.D - D)

        return errorT, errorD

    def correlation_matrix(self):
        """
        Calculate the correlation matrix with an estimated parameter. It must be called after the
        function fit. 
        """
        def f(parameters, time, curve):
            theta = parameters[0:len(self.theta)]
            #p = parameters[len(self.theta):]
            return self.integrate(theta, self.p, [0, time])[1, curve]

        K = len(self.theta)  #+ len(self.p)
        J1 = np.zeros((self.tf, K))
        J2 = np.zeros((self.tf, K))
        parameters = self.theta  #np.concatenate([self.theta, self.p])
        for i in range(self.tf):
            J1[i, :] = approx_fprime(parameters, f,
                                     np.ones_like(parameters) * 1e-5, i, 7)
            J2[i, :] = approx_fprime(parameters, f,
                                     np.ones_like(parameters) * 1e-5, i, 6)

        # Fisher Information matrix
        FIM = J1.transpose() @ self.weights @ J1 / self.sigma2_1 + J2.transpose(
        ) @ self.weights @ J2 / self.sigma2_2
        # Covariance matrix
        C = np.linalg.inv(FIM)
        # Correlation matrix
        R = [[C[i, j] / np.sqrt(C[i, i] * C[j, j]) for i in range(K)]
             for j in range(K)]

        return np.array(R)

    def _get_exp(self, pathname):

        with open(pathname, 'r') as f:
            line = f.readline()
            while line != '':
                lineold = line
                line = f.readline()
        exp = lineold[:lineold.find(';')]
        exp = 1 if exp == 'exp' else int(exp) + 1

        return exp

    def save_experiment(self, objective_function):
        """
        Save information about the experiment. 
        objective_function: name given to compare, like quadratic and divided.
        """
        pathname = '../experiments/' + objective_function + '.csv'
        if not os.path.exists(pathname):
            with open(pathname, 'w') as f:
                f.write(
                    'exp;tau;sigma;rho;delta;gamma1;gamma2;sbeta;order_beta;smu;order_mu;'
                )
                f.write('initial_day;final_day;hmax;psi;x0;bounds;algorithm;')
                f.write('E0;I0;A0;Q0;R0;D0;T0;alpha;beta;mu;obj;time')
                f.write('\n')
        else:
            with open(pathname, 'a') as f:
                exp = self._get_exp(pathname)
                tau, sigma, rho, delta, gamma1, gamma2 = self.p

                info = [
                    exp, tau, sigma, rho, delta, gamma1, gamma2, self.sbeta,
                    self.order_beta, self.smu, self.order_mu
                ]
                info2 = [self.initial_day, self.final_day, self.hmax, self.psi]

                f.write(';'.join(map(str, info)))
                f.write(';')
                f.write(';'.join(map(str, info2)))
                f.write(';')
                f.write(str(self.x0))
                f.write(';')
                f.write(str(self.bounds))
                f.write(';')
                f.write(self.algorithm + ';')
                f.write(';'.join(map(str, self.y0)))
                f.write(str(self.theta[0]) + ';')
                f.write(str(self.theta[1:1 + self.sbeta]))
                f.write(';')
                f.write(str(self.theta[-self.smu:]))
                f.write(';')
                f.write(str(self.obj) + ';')
                f.write(str(self.counter))
                f.write('\n')


# if __name__ == '__main__':

#     p = [0.3125, 0.5, 2e-5, 1, 1/9.5, 1/18]
#     beta = {'sbeta': 4, 'bspline_order': 3}
#     model = Fitting(p, beta)

#     psi = 0
#     bounds = [(0.7,0.95), (0.05,0.3), (0.05,0.3), (0.05,0.3), (0.05,0.3), (0, 0.2)] # bound the parameters
#     x0 = [0.9, 0.1, 0.1, 0.1, 0.1, 0.12/14]  # initial guess
#     theta = model.fit(psi, x0, bounds)