Ejemplo n.º 1
0
 def __init__(self, mean=0, sigma=1, name="", model=None):
     super().__init__(name, model)
     self.register_rv(Normal.dist(mu=mean, sigma=sigma), "v1")
     Normal("v2", mu=mean, sigma=sigma)
     Normal("v3", mu=mean, sigma=Normal("sd", mu=10, sigma=1, initval=1.0))
     Deterministic("v3_sq", self.v3 ** 2)
     Potential("p1", at.constant(1))
Ejemplo n.º 2
0
 def __init__(self, mean=0, sigma=1, name='', model=None):
     super().__init__(name, model)
     self.Var('v1', Normal.dist(mu=mean, sigma=sigma))
     Normal('v2', mu=mean, sigma=sigma)
     Normal('v3', mu=mean, sigma=HalfCauchy('sd', beta=10, testval=1.))
     Deterministic('v3_sq', self.v3 ** 2)
     Potential('p1', tt.constant(1))
Ejemplo n.º 3
0
 def __init__(self, mean=0, sigma=1, name='', model=None):
     super().__init__(name, model)
     self.Var('v1', Normal.dist(mu=mean, sigma=sigma))
     Normal('v2', mu=mean, sigma=sigma)
     Normal('v3', mu=mean, sigma=HalfCauchy('sd', beta=10, testval=1.))
     Deterministic('v3_sq', self.v3 ** 2)
     Potential('p1', tt.constant(1))
Ejemplo n.º 4
0
 def __init__(self, mean=0, sigma=1, name="", model=None):
     super().__init__(name, model)
     self.Var("v1", Normal.dist(mu=mean, sigma=sigma))
     Normal("v2", mu=mean, sigma=sigma)
     Normal("v3", mu=mean, sigma=HalfCauchy("sd", beta=10, testval=1.0))
     Deterministic("v3_sq", self.v3 ** 2)
     Potential("p1", aet.constant(1))
Ejemplo n.º 5
0
class LinearComponent(Model):
    """Creates linear component, y_est is accessible via attribute

    Parameters
    ----------
    name: str - name, associated with the linear component
    x: pd.DataFrame or np.ndarray
    y: pd.Series or np.array
    intercept: bool - fit with intercept or not?
    labels: list - replace variable names with these labels
    priors: dict - priors for coefficients
        use `Intercept` key for defining Intercept prior
            defaults to Flat.dist()
        use `Regressor` key for defining default prior for all regressors
            defaults to Normal.dist(mu=0, tau=1.0E-6)
    vars: dict - random variables instead of creating new ones
    offset: scalar, or numpy/theano array with the same shape as y
        this can be used to specify an a priori known component to be
        included in the linear predictor during fitting.
    """

    default_regressor_prior = Normal.dist(mu=0, tau=1.0e-6)
    default_intercept_prior = Flat.dist()

    def __init__(
        self,
        x,
        y,
        intercept=True,
        labels=None,
        priors=None,
        vars=None,
        name="",
        model=None,
        offset=0.0,
    ):
        super().__init__(name, model)
        if len(y.shape) > 1:
            err_msg = ("Only one-dimensional observed variable objects (i.e."
                       " of shape `(n, )`) are supported")
            raise TypeError(err_msg)
        if priors is None:
            priors = {}
        if vars is None:
            vars = {}
        x, labels = any_to_tensor_and_labels(x, labels)
        # now we have x, shape and labels
        if intercept:
            x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1)
            labels = ["Intercept"] + labels
        coeffs = list()
        for name in labels:
            if name == "Intercept":
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(name=name,
                                 dist=priors.get(name,
                                                 self.default_intercept_prior))
                coeffs.append(v)
            else:
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(
                        name=name,
                        dist=priors.get(
                            name,
                            priors.get("Regressor",
                                       self.default_regressor_prior)),
                    )
                coeffs.append(v)
        self.coeffs = tt.stack(coeffs, axis=0)
        self.y_est = x.dot(self.coeffs) + offset

    @classmethod
    def from_formula(cls,
                     formula,
                     data,
                     priors=None,
                     vars=None,
                     name="",
                     model=None,
                     offset=0.0,
                     eval_env=0):
        """Creates linear component from `patsy` formula.

        Parameters
        ----------
        formula: str - a patsy formula
        data: a dict-like object that can be used to look up variables referenced
            in `formula`
        eval_env: either a `patsy.EvalEnvironment` or else a depth represented as
            an integer which will be passed to `patsy.EvalEnvironment.capture()`.
            See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
        Other arguments are documented in the constructor.
        """
        import patsy

        eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
        y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
        labels = x.design_info.column_names
        return cls(
            np.asarray(x),
            np.asarray(y)[:, -1],
            intercept=False,
            labels=labels,
            priors=priors,
            vars=vars,
            name=name,
            model=model,
            offset=offset,
        )
Ejemplo n.º 6
0
class FrailtyIndependentComponent_Fix(Model):
    """
    General Class for transformation multivariate frailty model
    Hack of pm.GLM.LinearComponent
    Parameters
    ----------
    name : str - name, associated with the linear component
    x : pd.DataFrame or np.ndarray
    y : pd.Series or np.array
    e: pd.Series or np.array
    intercept : bool - fit with intercept or not?
    labels : list - replace variable names with these labels
    priors : dict - priors for coefficients
        use `Intercept` key for defining Intercept prior
            defaults to Flat.dist()
        use `Regressor` key for defining default prior for all regressors
            defaults to Normal.dist(mu=0, tau=1.0E-6)
    vars : dict - random variables instead of creating new ones
    """
    # First thing we need to do is define default priors for the model parameters
    default_regressor_prior = Normal.dist(mu=0, tau=1 / 100)
    default_lambda_prior = Gamma.dist(0.001, 0.001, testval=1.)
    default_rho_prior = Gamma.dist(0.001, 0.001, testval=1.)
    default_theta_prior = Gamma.dist(0.001, 0.001, testval=1.)

    def __init__(self,
                 time,
                 event,
                 x,
                 rs,
                 minibatch=1,
                 labels=None,
                 priors=None,
                 vars=None,
                 name='',
                 model=None):

        super(FrailtyIndependentComponent_Fix, self).__init__(name, model)
        if priors is None:
            priors = {}
        if vars is None:
            vars = {}

        ### first thing to do is determine whether we are working with tensors or np.matrices
        ## Debugging

        print(str(time))
        # if we are working with a matrix, we need to grab the value of the array that populates it

        if str(time) == '<TensorType(float64, matrix)>':
            data_tensor = True
            self.k = k = time.get_value().shape[1]  # outcome dimentionality
            self.n = n = time.get_value().shape[
                0]  # total number of observations
            self.p = p = x.get_value().shape[1]  # number of covariates

        else:
            data_tensor = False
            self.k = k = time.shape[1]  # outcome dimentionality
            self.n = n = time.shape[0]  # total number of observations
            self.p = p = x.shape[1]  # number of covariates

        x, labels = any_to_tensor_and_labels(
            x, labels)  # might need to do this for the other variables

        ## now for secondary delta for the gamma frac
        if data_tensor == True:

            # Create tensor variable for the gamma_frac component of the likelihood

            self.event_change = event_change = theano.shared(np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\
                                                                       for s in np.sum(event.get_value(), axis = 1)]), borrow = True)
        else:
            self.event_change = event_change = np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\
                                                         for s in np.sum(event, axis = 1)])

        ## Keep track of total size of the dataset, for minibatching
        ## new 10.10.2018
        # If minibatch, then we need the x component to be a generator and not just a tensor
        # by this step in the computation, X is already in tensor form

        if minibatch >= 2:  # kinda hacky but whatever, we can fix this later
            print("We're Mini batching")
            # If we're using mini-batch, then we have to tell the inner workings to fix the MAP estimate
            minibatch = int(
                minibatch)  #just in case some n00b puts in a double/float here
            x_mini = pm.Minibatch(
                data=x.get_value(), batch_size=minibatch
            )  # make minibatch instance of the design matrix
            time_mini = pm.Minibatch(
                data=time.get_value(),
                batch_size=minibatch)  # minibatch instance of the time array
            event_mini = pm.Minibatch(
                data=event.get_value(),
                batch_size=minibatch)  # minibatch instance of the event array
            event_change_mini = pm.Minibatch(
                data=event_change.get_value(), batch_size=minibatch
            )  # minibatch instance of the transformed event array

            ## assign self. attributes to later parameterize the logp function

            self.x = x_mini
            self.time = time_mini
            self.event = event_mini
            self.event_change = event_change_mini

        else:
            # if not minibatching, just pass the tensors as they are
            self.x = x
            self.time = time
            self.event = event
            self.event_change = event_change

        # now we have x, shape and labels

        # init a list to store all of the parameters that go into our likelihood
        coeffs_all = list()
        lams = list()
        rhos = list()

        for level in range(
                k
        ):  # for each dimension, instantiate a covariate effect for each predictor

            labels_this = [s + "_" + str(level) for s in labels]

            coeffs_this = list()
            for name in labels_this:
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(name=name,
                                 dist=priors.get(
                                     name,
                                     priors.get('Regressor',
                                                self.default_regressor_prior)))
                coeffs_this.append(v)
            coeffs_this = tt.stack(coeffs_this, axis=0)
            coeffs_all.append(coeffs_this)

            ### Now for the baseline hazard portions

            lam_name = 'lam_' + str(level)
            lam = self.Var(name=lam_name,
                           dist=priors.get(
                               lam_name,
                               priors.get('lam', self.default_lambda_prior))
                           )  # create labels for the lambdas
            lams.append(lam)
            # rhos
            rho_name = 'rho_' + str(level)
            rho = self.Var(name=rho_name,
                           dist=priors.get(
                               rho_name,
                               priors.get('rho', self.default_rho_prior)))
            rhos.append(rho)

            # finally, transformation parameters r
        # frailty parameter
        theta = self.Var(name='theta',
                         dist=priors.get(
                             'theta',
                             priors.get('Theta', self.default_theta_prior)))
        # make self attribute for the coefficients
        self.coeffs_all = coeffs_all

        # changing 10.18
        self.theta = theta
        self.lams = lams = tt.stack(lams, axis=0)
        self.rhos = rhos = tt.stack(rhos, axis=0)
Ejemplo n.º 7
0
class IndependentComponent(Model):
    """Creates independent component for independent variables, y_est is accessible via attribute
    Need to be able to hack this for usage with non-linear component
    Will be compatible with non-linear components as well
    Hack of pm.GLM.LinearComponent
    Parameters
    ----------
    name : str - name, associated with the linear component
    x : pd.DataFrame or np.ndarray
    y : pd.Series or np.array
    e: pd.Series or np.array
    intercept : bool - fit with intercept or not?
    labels : list - replace variable names with these labels
    priors : dict - priors for coefficients
        use `Intercept` key for defining Intercept prior
            defaults to Flat.dist()
        use `Regressor` key for defining default prior for all regressors
            defaults to Normal.dist(mu=0, tau=1.0E-6)
    vars : dict - random variables instead of creating new ones
    """
    default_regressor_prior = Normal.dist(mu=0, tau=1.0E-6)
    default_intercept_prior = Flat.dist()

    def __init__(self,
                 x,
                 y,
                 e,
                 intercept=True,
                 labels=None,
                 priors=None,
                 vars=None,
                 name='',
                 model=None):
        super(IndependentComponent, self).__init__(name, model)
        if priors is None:
            priors = {}
        if vars is None:
            vars = {}
        x, labels = any_to_tensor_and_labels(x, labels)

        # now we have x, shape and labels
        if intercept:
            x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1)
            labels = ['Intercept'] + labels
        self.x = x
        coeffs = list()
        for name in labels:
            if name == 'Intercept':
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(name=name,
                                 dist=priors.get(name,
                                                 self.default_intercept_prior))
                coeffs.append(v)
            else:
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(name=name,
                                 dist=priors.get(
                                     name,
                                     priors.get('Regressor',
                                                self.default_regressor_prior)))
                coeffs.append(v)
        self.coeffs = tt.stack(coeffs, axis=0)
        self.y_est = x.dot(self.coeffs)
Ejemplo n.º 8
0
class CopulaIndependentComponent(Model):
    """Creates independent component for independent variables, y_est is accessible via attribute
    Need to be able to hack this for usage with non-linear component
    Will be compatible with non-linear components as well
    Hack of pm.GLM.LinearComponent
    Parameters
    ----------
    name : str - name, associated with the linear component
    x : pd.DataFrame or np.ndarray
    y : pd.Series or np.array
    e: pd.Series or np.array
    intercept : bool - fit with intercept or not?
    labels : list - replace variable names with these labels
    priors : dict - priors for coefficients
        use `Intercept` key for defining Intercept prior
            defaults to Flat.dist()
        use `Regressor` key for defining default prior for all regressors
            defaults to Normal.dist(mu=0, tau=1.0E-6)
    vars : dict - random variables instead of creating new ones
    """
    default_regressor_prior = Normal.dist(mu=0, tau=1 / 100)

    def __init__(self,
                 time_1,
                 time_2,
                 e_1,
                 e_2,
                 x,
                 labels=None,
                 priors=None,
                 vars=None,
                 name='',
                 model=None):
        super(CopulaIndependentComponent, self).__init__(name, model)
        if priors is None:
            priors = {}
        if vars is None:
            vars = {}
        # we need 2 sets of these
        x, labels = any_to_tensor_and_labels(x, labels)
        # now we have x, shape and labels
        self.x = x
        labels_1 = [s + "_1" for s in labels]
        ###First Dimension
        coeffs_1 = list()
        for name in labels_1:
            if name in vars:
                v = Deterministic(name, vars[name])
            else:
                v = self.Var(name=name,
                             dist=priors.get(
                                 name,
                                 priors.get('Regressor',
                                            self.default_regressor_prior)))
            coeffs_1.append(v)
        self.coeffs_1 = tt.stack(coeffs_1, axis=0)
        #### Second Dimension
        labels_2 = [s + "_2" for s in labels]
        coeffs_2 = list()
        for name in labels_2:
            if name in vars:
                v = Deterministic(name, vars[name])
            else:
                v = self.Var(name=name,
                             dist=priors.get(
                                 name,
                                 priors.get('Regressor',
                                            self.default_regressor_prior)))
            coeffs_2.append(v)
        self.coeffs_2 = tt.stack(coeffs_2, axis=0)
        ### create independent components as attributes of self.
        self.indep_1 = x.dot(self.coeffs_1)
        self.indep_2 = x.dot(self.coeffs_2)
        self.labels_1 = labels_1
        self.labels_2 = labels_2