def estimate_movement_std(position_info):

    MODEL_FORMULA = 'position ~ lagged_position - 1'
    response, design_matrix = dmatrices(MODEL_FORMULA, position_info)
    fit = GLM(response, design_matrix, family=families.Gaussian()).fit()

    return np.sqrt(fit.scale)
예제 #2
0
def estimate_movement_variance(position, lagged_position, speed):

    data = {
        'position': position,
        'lagged_position': lagged_position
    }

    MODEL_FORMULA = 'position ~ lagged_position - 1'
    response, design_matrix = dmatrices(MODEL_FORMULA, data)
    fit = GLM(response, design_matrix, family=families.Gaussian()).fit()

    return np.sqrt(fit.scale)
def estimate_movement_std(position):
    '''Estimates the movement standard deviation based on position.

    WARNING: Need to use on original position, not interpolated position.

    Parameters
    ----------
    position : ndarray, shape (n_time, n_position_dim)

    Returns
    -------
    movement_std : ndarray, shape (n_position_dim,)

    '''
    position = atleast_2d(position)
    is_nan = np.any(np.isnan(position), axis=1)
    position = position[~is_nan]
    movement_std = []
    for p in position.T:
        fit = GLM(p[:-1], p[1:], family=families.Gaussian()).fit()
        movement_std.append(np.sqrt(fit.scale))
    return np.array(movement_std)
def run_regression(x, y, feature_df, model=families.Gaussian()):
    # regression_results = sm.GLM(y, x, family=model).fit(maxiter=1000)
    # feature_df["pvalue"] = list(regression_results.pvalues)
    # feature_df["coefficient"] = list(regression_results.params)
    # r2 = compute_rsquares(y, regression_results)

    nr, nc = x.shape
    r_x = robjects.r.matrix(x, nrow=nr, ncol=nc)
    r_y = robjects.r.array(y)
    results = stats.glm_fit(r_x,
                            r_y,
                            family=stats.gaussian(),
                            factors=feature_df["features"])
    fitted_y = list(stats.fitted(results))
    col_names = list(stats.summary_glm(results).rx2('coefficients').colnames)
    data = pandas2ri.ri2py(stats.summary_glm(results).rx2('coefficients'))

    dataset = pd.DataFrame(
        {col_names[i]: data[:, i]
         for i in range(len(col_names))})

    r2 = compute_rpy2_rsquares(y, fitted_y)
    return feature_df, r2
"""Calculates the evidence of being in a replay state based on the
current speed and the speed in the previous time step.

"""
from functools import partial

import numpy as np
from patsy import dmatrices
from statsmodels.api import GLM, families
from statsmodels.tsa.tsatools import lagmat

from .core import scale_likelihood

FAMILY = families.Gaussian(link=families.links.log())
FORMULA = 'speed ~ lagged_speed - 1'


def speed_likelihood(speed,
                     lagged_speed,
                     replay_coefficients,
                     replay_scale,
                     no_replay_coefficients,
                     no_replay_scale,
                     speed_threshold=4.0):
    """Calculates the evidence of being in a replay state based on the
    current speed and the speed in the previous time step.

    Parameters
    ----------
    speed : ndarray, shape (n_time,)
    lagged_speed : ndarray, shape (n_time,)
예제 #6
0
def penalized_IRLS(design_matrix,
                   response,
                   sqrt_penalty_matrix=None,
                   penalty=_EPS,
                   family=families.Gaussian(),
                   max_iterations=25,
                   prior_weights=None,
                   offset=None,
                   tolerance=1E-8):
    '''Estimate coefficients and associated statistics of models in the
    exponential family.

    Parameters
    ----------
    design_matrix : ndarray, shape (n_observations, n_covariates)
    response : ndarray, shape (n_observations,)
    sqrt_penalty_matrix : ndarray, optional,
                          shape (n_observations, n_observations)
    penalty : ndarray, optional, shape (n_observations,)
    family : statsmodels.api.family instance, optional
    max_iterations : int, optional
    prior_weights : ndarray, optional, shape (n_observations,)
    offset : ndarray, optional, shape (n_observations,)
    tolerance : float, optional

    Returns
    -------
    coefficients : ndarray, shape (n_covariates,)
    is_converged : bool
    coefficient_covariance : ndarray, shape (n_covariates, n_covariates)
    aic : float
    deviance : float
    degrees_of_freedom : float
    scale : float

    '''
    if design_matrix.ndim < 2:
        design_matrix = design_matrix[:, np.newaxis]
    if response.ndim < 2:
        response = response[:, np.newaxis]

    n_observations, n_covariates = design_matrix.shape

    if prior_weights is None:
        prior_weights = np.ones_like(response)

    if offset is None:
        offset = np.zeros_like(response)

    if sqrt_penalty_matrix is None:
        sqrt_penalty_matrix = np.eye(n_covariates, dtype=design_matrix.dtype)

    is_converged = False

    predicted_response = family.starting_mu(response)
    linear_predictor = family.link(predicted_response)

    sqrt_penalty_matrix = np.sqrt(penalty) * sqrt_penalty_matrix

    augmented_weights = np.ones_like(response[:n_covariates])
    full_design_matrix = np.concatenate((design_matrix, sqrt_penalty_matrix))
    augmented_response = np.zeros_like(response[:n_covariates])
    coefficients = np.zeros((n_covariates, ))

    for _ in range(max_iterations):
        link_derivative = family.link.deriv(predicted_response)
        pseudo_data = (linear_predictor +
                       (response - predicted_response) * link_derivative -
                       offset)
        weights = prior_weights / (family.variance(predicted_response) *
                                   link_derivative**2)

        full_response = np.concatenate((pseudo_data, augmented_response))
        full_weights = np.concatenate((np.sqrt(weights), augmented_weights))

        coefficients_old = coefficients.copy()
        try:
            coefficients = np.linalg.lstsq(full_design_matrix * full_weights,
                                           full_response * full_weights,
                                           rcond=None)[0]
        except (np.linalg.LinAlgError, ValueError):
            logger.warn(
                'Weighted least squares failed. Returning NaN coefficiients.')
            coefficients *= np.nan
            break

        linear_predictor = offset + design_matrix @ coefficients
        predicted_response = family.link.inverse(linear_predictor)

        # use deviance change instead?
        coefficients_change = np.linalg.norm(coefficients - coefficients_old)
        if coefficients_change < tolerance:
            is_converged = True
            break

    U, singular_values, Vt = _weighted_design_matrix_svd(
        design_matrix, sqrt_penalty_matrix, weights)

    degrees_of_freedom = get_effective_degrees_of_freedom(U)
    scale, is_estimated_scale = estimate_scale(family, response,
                                               predicted_response,
                                               prior_weights,
                                               degrees_of_freedom)
    coefficient_covariance = get_coefficient_covariance(
        U, singular_values, Vt, scale)
    deviance = family.deviance(response, predicted_response, prior_weights,
                               scale)
    log_likelihood = family.loglike(response, predicted_response,
                                    prior_weights, scale)
    aic = estimate_aic(log_likelihood, degrees_of_freedom)

    return Results(coefficients=np.squeeze(coefficients),
                   is_converged=is_converged,
                   coefficient_covariance=coefficient_covariance,
                   AIC=aic,
                   deviance=deviance,
                   degrees_of_freedom=degrees_of_freedom,
                   scale=scale)
def fit_speed_model(speed, lagged_speed):
    FORMULA = 'speed ~ lagged_speed - 1'
    response, design_matrix = dmatrices(
        FORMULA, dict(speed=speed, lagged_speed=lagged_speed))
    family = families.Gaussian(link=families.links.log)
    return GLM(response, design_matrix, family=family).fit()