Python GLS Examples

Programming Language: Python

Namespace/Package Name: statsmodels.api

Class/Type: GLS

Examples at hotexamples.com: 8

Python GLS - 8 examples found. These are the top rated real world Python examples of statsmodels.api.GLS extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GLS(3)

t_test(2)

predict(1)

Example #1

Show file

    def fit(self, norm="zscore"):
        """ Calculate the regression parameters and statistics. """

        bold = self.bold.copy()
        dm = self.dm.copy()

        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data["movement"]
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass

        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm

        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold), :]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()

Example #2

Show file

File: base.py Project: parenthetical-e/roi

    def fit(self, norm='zscore'):
        """ Calculate the regression parameters and statistics. """
        
        bold = self.bold.copy()
        dm = self.dm.copy()
        
        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data['movement']
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass
        
        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm
        
        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold),:]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()

Example #3

Show file

    def _orth_dm(self):
        """ Orthgonalize (by regression) each col in self.dm with respect to 
        its left neighbor. """

        dm = self.dm
        ## Rename for brevity

        # Make sure conds and ncol dm
        # are divisors
        conds = list(set(self.trials))
        nconds = len(conds) - 1  ## Drop baseline
        ncols = dm.shape[1] - 1
        if ncols % nconds:
            raise ValueError(
                "The number of condtions and shape of the dm are incompatible."
            )

        # If these are the same size there is nothing to
        # orthgonalize.
        if ncols != nconds:
            orth_dm = np.zeros_like(dm)
            orth_dm[:, 0] = dm[:, 0]
            ## Move baseline data over

            # Use num_col_per_cond, along with nconds
            # to find the strides we need to take along
            # the DM to orthgonalize each set of col(s)
            # belonging to each cond.
            num_col_per_cond = ncols / nconds
            for cond in conds:
                # Skip baseline
                if cond == 0:
                    continue

                left = cond
                right = cond + nconds

                # Rolling loop over the cols_per_cond
                # orthgonalizing as we go.
                # Note: we never use cnt directly...
                for cnt in range(num_col_per_cond - 1):
                    # Orthgonalize left col to right....
                    glm = GLS(dm[:, right], dm[:, left]).fit()  ## GLS(y, x)
                    orth_dm[:, right] = glm.resid
                    orth_dm[:, left] = dm[:, left]

                    # Shift indices for next iteration.
                    left = deepcopy(right)
                    right = right + nconds

            self.dm = orth_dm
        else:
            print("Nothing to orthgonalize.")

Example #4

Show file

def gls_model(X, y, test_size=0.25):

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size)

    gls = GLS(y_train, X_train).fit(cov_type='HC1')

    coeffs = (gls.params.to_frame().rename(columns={
        0: "coeff"
    }).merge(
        gls.pvalues.to_frame().rename(columns={0: "p-value"}),
        left_index=True,
        right_index=True,
    ).assign(coeff=lambda df: df.apply(
        lambda r: r["coeff"] if r["p-value"] < 0.05 else np.nan, axis=1))[[
            "coeff"
        ]])

    mse = gls.predict(X_test).pipe(lambda s: (s - y_test)**2).mean()

    return {"model": gls, "coeffs": coeffs, "mse": mse}

Example #5

Show file

class Exp():
    """
    A template class for running easily parallelizable event-related fMRI
    simulations.  
    
    Note: Exp() can"t be run as is.  For simple run-able experiments see 
        simfMRI.bin.examples.*
    """
    def __init__(self, TR=2, ISI=2, prng=None):
        # ----
        # These need to be set during subclassing
        # and before use.
        self.trials = None
        self.durations = None
        self.prng = None
        ## Hang a RandomState object off
        ## of self.

        self.data = {}
        self.data["meta"] = {}
        ## meta is for model metadata

        # ----
        # Other needed functions you
        # might want to override
        self.noise_f = white
        self.hrf_params = {
            "width": 32,
            "TR": 1,
            "a1": 6.0,
            "a2": 12.,
            "b1": 0.9,
            "b2": 0.9,
            "c": 0.35
        }
        self.hrf = double_gamma(**self.hrf_params)
        # ----

        # ----
        # Setup globals,
        self.TR = TR
        self.ISI = ISI

        # --
        # and intialize the simulation"s (private)
        # data structues
        self.dm = None  ## The design matrix (create_dm, create_dm_param).
        self.bold = None  ## The bold signal (create_bold).
        self.results = {}  ## Simulation results go here.
        ## After a save_state() call.
        self.glm = None  ## Where the GLM object is stored after a fit call.
        # ----

        # ----
        # Config:
        # --
        # Move from ISI to ITI time, if needed.
        if (self.ISI % self.TR) > 0.0:
            raise ValueError("ISI must a even multiple of the TR.")
        elif self.ISI > self.TR:
            # Use multplier to transform data and trials into units
            # of TR from their native ISI.
            mult = self.ISI / self.TR

            trials_copy = deepcopy(self.trials)
            ## Needed to prevent circular updates

            trials_mult = []
            [
                trials_mult.extend([
                    t,
                ] + [
                    0,
                ] * (mult - 1)) for t in trials_copy
            ]

            data_copy = deepcopy(self.data)
            data_mult = defaultdict(list)
            for k, vals in data_copy.items():
                [
                    data_mult[k].extend([
                        v,
                    ] + [
                        0,
                    ] * (mult - 1)) for v in vals
                ]

            # Finally replace results with the
            # expanded version
            self.trials = trials_mult
            self.data = data_mult

    def _normalize_array(self, arr, function_name):
        """ Normalize the <arr>ay using the <function_name> 
        of one of the functions in roi.norm """

        return getattr(norm, function_name)(arr)

    def _orth_dm(self):
        """ Orthgonalize (by regression) each col in self.dm with respect to 
        its left neighbor. """

        dm = self.dm
        ## Rename for brevity

        # Make sure conds and ncol dm
        # are divisors
        conds = list(set(self.trials))
        nconds = len(conds) - 1  ## Drop baseline
        ncols = dm.shape[1] - 1
        if ncols % nconds:
            raise ValueError(
                "The number of condtions and shape of the dm are incompatible."
            )

        # If these are the same size there is nothing to
        # orthgonalize.
        if ncols != nconds:
            orth_dm = np.zeros_like(dm)
            orth_dm[:, 0] = dm[:, 0]
            ## Move baseline data over

            # Use num_col_per_cond, along with nconds
            # to find the strides we need to take along
            # the DM to orthgonalize each set of col(s)
            # belonging to each cond.
            num_col_per_cond = ncols / nconds
            for cond in conds:
                # Skip baseline
                if cond == 0:
                    continue

                left = cond
                right = cond + nconds

                # Rolling loop over the cols_per_cond
                # orthgonalizing as we go.
                # Note: we never use cnt directly...
                for cnt in range(num_col_per_cond - 1):
                    # Orthgonalize left col to right....
                    glm = GLS(dm[:, right], dm[:, left]).fit()  ## GLS(y, x)
                    orth_dm[:, right] = glm.resid
                    orth_dm[:, left] = dm[:, left]

                    # Shift indices for next iteration.
                    left = deepcopy(right)
                    right = right + nconds

            self.dm = orth_dm
        else:
            print("Nothing to orthgonalize.")

    def _convolve_hrf(self, arr):
        """
        Convolves hrf basis with a 1 or 2d (column-oriented) array.
        """

        # self.hrf may or may not exist yet
        if self.hrf == None:
            raise ValueError("No hrf is defined. Try self.create_hrf()?")

        arr = np.asarray(arr)  ## Just in case

        # Assume 2d (or really N > 1 d),
        # fall back to 1d.
        arr_c = np.zeros_like(arr)
        try:
            for col in range(arr.shape[1]):
                arr_c[:, col] = np.convolve(arr[:, col],
                                            self.hrf)[0:arr.shape[0]]
                ## Convolve and truncate to length
                ## of arr
        except IndexError:
            arr_c = np.convolve(arr[:], self.hrf)[0:arr.shape[0]]

        return arr_c

    def _reformat_model(self):
        """
        Use save_state() to store the simulation"s state. This private 
        method just extracts relevant data from the regression model 
        into a dict.
        """

        tosave = {
            "beta": "params",
            "t": "tvalues",
            "fvalue": "fvalue",
            "p": "pvalues",
            "r": "rsquared",
            "ci": "conf_int",
            "resid": "resid",
            "aic": "aic",
            "bic": "bic",
            "llf": "llf",
            "mse_model": "mse_model",
            "mse_resid": "mse_resid",
            "mse_total": "mse_total"
        }

        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        model_results = {}
        for k, v in tosave.items():
            try:
                model_results[k] = deepcopy(getattr(self.glm, v)())
            except TypeError:
                model_results[k] = deepcopy(getattr(self.glm, v))
            except AttributeError:
                continue

        return model_results

    def _generate_doc(self, name, bold, dm, dm_params):
        """ Generate a doc string for the current model. """

        if dm_params.get("box"):
            doc = """ {0}. Bold: {1}. DM: {2}. """.format(
                name, bold, ["baseline", "box"] + dm)
        else:
            doc = """ {0}. Bold: {1}. DM: {2}. """.format(
                name, bold, [
                    "baseline",
                ] + dm)
            ## baseline is added automigcally
            ## during create_dm... so we add
            ## it here too
        return doc

    def _template_model(self, bold, dm, bold_params, dm_params, norm):
        """ A template model used by populate_models() to create all
        the regression models used during a run().
        
        Note: __doc__ for this function gets redone dynamically. """

        self.data["meta"]["bold"] = deepcopy(bold)

        # If were dealing with a parametric set
        # and a univariate box is bieng used
        if dm_params.get("box"):
            self.data["meta"]["dm"] = ["baseline", "box"] + dm
        else:
            # otherwise just account for the baseline
            self.data["meta"]["dm"] = [
                "baseline",
            ] + dm

        # Try to unpack dm_params into create_dm
        # first, but if there are too many args
        # ("TypeError") try create_dm_param
        if len(dm_params) == 2:
            # Setup the dm,
            self.create_dm(**dm_params)

            # and the univariate bold.
            boldcol = [dm.index(b) + 1 for b in bold]
            ## +1 for baseline

            self.create_bold(self.dm[:, boldcol], **bold_params)
        elif len(dm_params) == 4:

            # Setup the dm,
            self.create_dm_param(names=dm, **dm_params)

            # then the parametric bold
            # from self.data[]
            boldarr = np.array(self.data[bold.pop()])  ## Init

            # Only goes if len(bold) > 1, see pop above.
            for b in bold:
                boldarr = np.vstack((boldarr, np.array(self.data[b])))

            self.create_bold(boldarr, **bold_params)
        else:
            raise ValueError("dm_params has the wrong number of arguments.")

        self.fit(norm=norm)

    def create_dm(self, drop=None, convolve=True):
        """ Create a unit (boxcar-only) DM with one columns for each 
        condition in self.trials.  
        
         If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
        ## Find and sort conditions in trials

        # Some useful counts...
        num_conds = len(cond_levels)
        num_tr = np.sum(self.durations)

        # Map each condition in trials to a
        # 2d binary 2d array.  Each row is a trial
        # and each column is a condition.
        dm_unit = np.zeros((num_tr, num_conds))
        for col, cond in enumerate(cond_levels):
            # Create boolean array use it to
            # populate the dm with ones...
            # which must be in tr time.
            mask_in_tr = dtime(self.trials == cond, self.durations, drop,
                               False)

            dm_unit[mask_in_tr, col] = 1

        self.dm = dm_unit

        if convolve:
            self.dm = self._convolve_hrf(self.dm)

    def create_dm_param(self,
                        names,
                        drop=None,
                        box=True,
                        orth=False,
                        convolve=True):
        """ Create a parametric design matrix based on <names> in self.data. 
        
        If <box> a univariate dm is created that fills the leftmost
        side of the dm.

        If <orth> each regressor is orthgonalized with respect to its
        left-hand neighbor (excluding the baseline).
        
        If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
        ## Find and sort conditions in trials

        # Some useful counts...
        num_names = len(names)
        num_tr = np.sum(self.durations)
        dm_param = None
        ## Will eventually hold the
        ## parametric DM.

        for cond in cond_levels:
            if cond == 0:
                continue
                ## We add the baseline
                ## in at the end

            # Create a temp dm to hold this
            # condition"s data
            dm_temp = np.zeros((num_tr, num_names))
            mask_in_tr = dtime(self.trials == cond, self.durations, drop,
                               False)

            # Get the named data, convert to tr time
            # then add to the temp dm using the mask
            for col, name in enumerate(names):
                data_in_tr = dtime(self.data[name], self.durations, drop, 0)

                dm_temp[mask_in_tr, col] = data_in_tr[mask_in_tr]

            # Store the temporary DM in
            # the final DM.
            if dm_param == None:
                dm_param = dm_temp  ## reinit
            else:
                dm_param = np.hstack((dm_param, dm_temp))  ## adding

        # Create the unit DM too, then combine them.
        # defining self.dm in the process
        self.create_dm(convolve=False)
        dm_unit = self.dm.copy()
        self.dm = None
        ## Copy and reset

        if box:
            self.dm = np.hstack((dm_unit, dm_param))
        else:
            baseline = dm_unit[:, 0]
            baseline = baseline.reshape(baseline.shape[0], 1)
            self.dm = np.hstack((baseline, dm_param))
            ## If not including the boxcar,
            ## we still need the baseline model.

        # Orthgonalize the regessors?
        if orth:
            self._orth_dm()

        # Convolve with self.hrf?
        if convolve:
            self.dm = self._convolve_hrf(self.dm)

    def create_bold(self, arr, convolve=False):
        """ 
        The provided <arr>ay becomes a noisy bold signal. 
        
        <convolve> - if True, the dm is convolved with the HRF defined by
        self.hrf().
        """

        arr = np.array(arr)
        try:
            self.bold = arr.sum(1)
            ## Sum cols, arr might
            ## have been 2d, need 1d.
        except ValueError:
            self.bold = arr

        # HRF?
        if convolve:
            self.bold = self._convolve_hrf(self.bold)

        # And add noise.
        noise, self.prng = self.noise_f(N=self.bold.shape[0], prng=self.prng)
        self.bold += noise

    def save_state(self, name):
        """
        Saves most of the state of the current simulation to results, keyed
        on <name>.  Saves greedily, trading storage space for security and
        redundancy.
        """

        tosave = {
            "TR": "TR",
            "ISI": "ISI",
            "trials": "trials",
            "data": "data",
            "dm": "dm",
            "bold": "bold"
        }
        ## This list is only for attr hung directly off
        ## of self.

        # Add a name to results
        self.results[name] = {}

        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        for k, v in tosave.items():
            try:
                self.results[name][k] = deepcopy(getattr(self, v)())
            except TypeError:
                self.results[name][k] = deepcopy(getattr(self, v))
            except AttributeError:
                continue

        # Now add the reformatted data from the current model,
        # if any.
        self.results[name].update(self._reformat_model())

    def populate_models(self, model_config):
        """ Use <model_config> to populate the experiment with models. 
        
        Note:
        Models become methods that must follow the naming convention,
        model_XX.., where XX.. is more than two integers [0-9].  
        
        For example: model_01, model_010, and model_69 are valid, 
        while model_A1, model_1 are not. """

        # Read in the model_config and loop
        # over its
        conf = ConfigParser.ConfigParser()
        readresult = conf.read(model_config)
        ## If conf.read() can't find model_config
        ## it (annoyingly) returns an empty list
        ## in python empty lists are false, so...

        if readresult:
            print(conf.sections())
            ## Tell the user about the models.

            for sec in conf.sections():
                # Get the config data for sec
                dm_params = eval(conf.get(sec, "dm_params"))
                bold_params = eval(conf.get(sec, "bold_params"))
                bold = eval(conf.get(sec, "bold"))
                dm = eval(conf.get(sec, "dm"))
                norm = eval(conf.get(sec, "norm"))
                ## Note: Using eval is slow and REALLY unsafe

                # Close on _template_model, update its
                # __doc__ and hang it on self as <sec>.
                parmodel = partial(self._template_model, bold, dm, bold_params,
                                   dm_params, norm)
                parmodel.__doc__ = self._generate_doc(sec, bold, dm, dm_params)

                print("Created:{0}".format(parmodel.__doc__))
                setattr(self, sec, parmodel)
                ## setattr magic to add
                ## <parmodel> to self as <sec>
        else:
            raise IOError("No such file: '{0}'".format(model_config))

    def fit(self, norm="zscore"):
        """ Calculate the regression parameters and statistics. """

        bold = self.bold.copy()
        dm = self.dm.copy()

        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data["movement"]
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass

        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm

        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold), :]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()

    def contrast(self, contrast):
        """ Uses the current model to statistically compare predictors 
        (t-test), returning df, t and p values.
        
        <contrast> - a 1d list of [1,0,-1] the same length as the number
            of predictors in the model (sans the dummy, which is added
            silently). """

        if self.glm == None:
            raise ValueError("No glm present.  Try self.fit()?")

        contrast = self.glm.t_test(contrast)
        ## This a thin wrapper for
        ## statsmodels contrast() method

        return contrast.df_denom, contrast.tvalue, contrast.pvalue

    def print_model_summary(self):
        """ Prints all defined model names and their docstrings. """

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        model_count = 0
        for attr in all_attr:
            a_s = re.split("_", attr)

            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == "model") and (re.match("\A\d+\Z", a_s[1])):

                    model_count += 1
                    model = attr
                    ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                            "{0} was not unique.".format(model))
                    past_models.append(model)

                    # Now call the model and
                    # print its info out.
                    print("{0}. {1}:".format(model_count, model))
                    try:
                        func = getattr(self, model)
                        print(func.im_func.func_doc)
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue

    def run(self, code):
        """
        Run all defined models in order, returning their tabulated results.
        
        <code> - the unique batch or run code for this experiment.
        
        Models are any method of the form "model_N" where N is an
        integer (e.g. model_2, model_1012 or model_666).  Models take no
        arguments.
        """

        self.results["batch_code"] = code

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        for attr in all_attr:
            a_s = re.split("_", attr)

            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == "model") and (re.match("\A\d+\Z", a_s[1])):

                    model = attr
                    ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                            "{0} was not unique.".format(model))
                    past_models.append(model)

                    # Now call the model and
                    # save its results.
                    print("Fitting {0}.".format(model))
                    try:
                        getattr(self, model)()
                    except KeyError:
                        # Missing model should just be skipped.
                        print("Data not Found.  Moving on.")
                        continue

                    self.save_state(name=model)

        return self.results

Example #6

Show file

File: base.py Project: parenthetical-e/roi

class Roi():
    """
    The basic. It does OLS regression. 
    """
    
    def __init__(self, TR, roi_name, trials, durations, data):
        # ---
        # User defined variables
        self.TR = TR
        self.roi_name = roi_name
        self.trials = np.array(trials)
        self.durations = np.array(durations)

        self.data = data
        if self.data == None:
            self.data = {}

        self.data['meta'] = {}  ## meta is for model metadata

        # ---
        # Intialize model data structues,
        # these take on values by calling:
        self.hrf = None     ## self.create_hrf()
        self.dm = None      ## self.create_dm() or create_dm_pararm()
        self.bold = None    ## self.create_bold()

        # --- 
        # The two flavors of results
        self.glm = None     ## self.fit(), fit a regrssion model
        self.results = {}   ## self.extract_results() into a dict


    def _convolve_hrf(self, arr):
        """
        Convolves hrf basis with a 1 or 2d (column-oriented) array.
        """
        
        # self.hrf may or may not exist yet
        if self.hrf == None:
            raise ValueError('No hrf is defined. Try self.create_hrf()?')
        
        arr = np.asarray(arr)   ## Just in case 

        # Assume 2d (or really N > 1 d), 
        # fall back to 1d.
        arr_c = np.zeros_like(arr)
        try:
            for col in range(arr.shape[1]):
                arr_c[:,col] = np.convolve(
                        arr[:,col], self.hrf)[0:arr.shape[0]]
                    ## Convolve and truncate to length
                    ## of arr
        except IndexError:
            arr_c = np.convolve(arr[:], self.hrf)[0:arr.shape[0]]
        
        return arr_c


    def _reformat_model(self):
        """
        Use extract_results() to store the simulation's state.
        
        This private method just extracts relevant data from the regression
        model into a dict.
        """
        
        tosave = {
            'beta':'params',
            't':'tvalues',
            'f_pvalue':'f_pvalue',
            'fvalue':'fvalue',
            'p':'pvalues',
            'r':'rsquared',
            'r_adj':'rsquared_adj',
            'ci':'conf_int',
            'resid':'resid',
            'wresid':'wresid',
            'aic':'aic',
            'bic':'bic',
            'llf':'llf',
            'bse':'bse',
            'nobs':'nobs',
            'df_model':'df_model',
            'df_resid':'df_resid',
            'ess':'ess',
            'ssr':'ssr',
            'mse_model':'mse_model',
            'mse_resid':'mse_resid',
            'mse_total':'mse_total',
            'pretty_summary':'summary'}
        
        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        model_results = {}
        for key, val in tosave.items():
            try:
                model_results[key] = deepcopy(getattr(self.glm, val)())
            except TypeError:
                model_results[key] = deepcopy(getattr(self.glm, val))
            except AttributeError:
                continue
        
        return model_results


    def _filter_array(self, arr):
        """ Filter and smooth the 1 or 2 d <arr>ay. """    
        
        if len(arr.shape) > 2:
            raise ValueError("<arr> must be 1 or 2d.")

        # Then use nitime to 
        # high pass filte using FIR
        # (~1/128 s, same cutoff as SPM8's default)

        # FIR did well in:
        #
        # Comparison of Filtering Methods for fMRI Datasets
        # F. Kruggela, D.Y. von Cramona, X. Descombesa
        # NeuroImage 10 (5), 1999, 530 - 543.
        filtered = np.ones_like(arr)
        try:
            # Try 2d first...
            for col in range(arr.shape[1]):         
                tsi = nitime.TimeSeries(arr[:,col], 1, self.TR)
                fsi = nitime.analysis.FilterAnalyzer(tsi, ub=None, lb=0.008)
                filtered[...,col] = fsi.fir.data
        except IndexError:
            # Fall back to 1d
            tsi = nitime.TimeSeries(arr, 1, self.TR)
            fsi = nitime.analysis.FilterAnalyzer(tsi, ub=None, lb=0.008)
            filtered = fsi.fir.data

        return filtered


    def _normalize_array(self, arr, function_name):
        """ Normalize the <arr>ay using the <function_name> 
        of one of the functions in roi.norm """

        return getattr(roi.norm, function_name)(arr)


    def _orth_dm(self):
        """ Orthgonalize (by regression) each col in self.dm with respect to 
        its left neighbor. """
        
        dm = self.dm  
            ## Rename for brevity
        
        # Make sure conds and ncol dm
        # are divisors
        conds = list(set(self.trials))
        nconds = len(conds) - 1;     ## Drop baseline
        ncols = dm.shape[1] - 1
        if ncols % nconds:
            raise ValueError(
                "The number of condtions and shape of the dm are incompatible.")

        # If these are the same size there is nothing to
        # orthgonalize.
        if ncols != nconds:
            orth_dm = np.zeros_like(dm)
            orth_dm[:,0] = dm[:,0]
                ## Move baseline data over

            # Use num_col_per_cond, along with nconds 
            # to find the strides we need to take along
            # the DM to orthgonalize each set of col(s) 
            # belonging to each cond.
            num_col_per_cond = ncols / nconds
            for cond in conds:
                # Skip baseline
                if cond == 0: continue
                left = cond
                right = cond + nconds
                
                # Rolling loop over the cols_per_cond
                # orthgonalizing as we go.
                for cnt in range(num_col_per_cond-1):
                    # Orthgonalize left col to right....
                    glm = GLS( dm[:,right], dm[:,left]).fit()  ## GLS(y, x)
                    orth_dm[:,right] = glm.resid
                    orth_dm[:,left] = dm[:,left]
                   
                    # Shift indices for next iteration.
                    left = deepcopy(right)
                    right = right + nconds

            self.dm = orth_dm
        else:
            print("Nothing to orthgonalize.")


    def _write_bold(self):
        """ Write the bold signal to a file in ./bold. """
        
        no_exten = re.split('\.', self.roi_name)[0]
        pathsplit = re.split('/', no_exten)
        drop_path = pathsplit[-1]
        name = drop_path + '_bold.txt'
            ## Grab the non-file extension, non-path, 
            ## parts of roi_name and
            ## append a new extention.
        
        # Create a bold dir nested where roi_name is stored, 
        # if needed that is.
        bold_path = '/'.join(pathsplit[0:-1] + ['bold', ])
        if not os.path.exists(bold_path):
            os.mkdir(bold_path)

        # Write!
        fid = open('/'.join([bold_path, name]), 'w')
        self.bold.tofile(fid, sep='\n')
        fid.close()


    def create_hrf(self, function_name, params=None):
        """ Creates an hemodynamic response model using the 
        function named <function_name> in roi.hrfs using
        <params> which should be a dictionary of parameters that 
        function_name will accept when called.
        
        If <params> is None the default parameters are used, if 
        possible. 
        
        hrf function_names are 'mean_fir' and 'double_gamma'. """

        if params == None:
            self.hrf = getattr(roi.hrfs, function_name)(self)
        else:
            self.hrf = getattr(roi.hrfs, function_name)(self, **params)


    def create_dm(self, drop=None, convolve=True):
        """ Create a unit (boxcar-only) DM with one columns for each 
        condition in self.trials.  
        
         If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
            ## Find and sort conditions in trials

        # Some useful counts...
        num_conds = len(cond_levels)
        num_tr = np.sum(self.durations)
        num_trials = len(self.trials)

        # Map each condition in trials to a
        # 2d binary 2d array.  Each row is a trial
        # and each column is a condition.
        dm_unit = np.zeros((num_tr, num_conds))
        for col, cond in enumerate(cond_levels):
            # Create boolean array use it to 
            # populate the dm with ones... 
            # which must be in tr time.
            mask_in_tr = roi.timing.dtime(
                    self.trials == cond, self.durations, None, False)

            dm_unit[mask_in_tr,col] = 1

        self.dm = dm_unit
        
        if convolve:
            self.dm = self._convolve_hrf(self.dm)


    def create_dm_param(self, names, drop=None, box=True, 
            orth=False, convolve=True):
        """ Create a parametric design matrix based on <names> in self.data. 
        
        If <box> a univariate dm is created that fills the leftmost
        side of the dm.

        If <orth> each regressor is orthgonalized with respect to its
        left-hand neighbor (excluding the baseline).
        
        If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
            ## Find and sort conditions in trials
       
        # Some useful counts...
        num_trials = len(self.trials)
        num_names = len(names)
        num_tr = np.sum(self.durations)
        
        dm_param = None
            ## Will eventually hold the 
            ## parametric DM.

        for cond in cond_levels:
            if cond == 0:
                continue
                    ## We add the baseline 
                    ## in at the end

            # Create a temp dm to hold this
            # condition's data
            dm_temp = np.zeros((num_tr, num_names))

            mask_in_tr = roi.timing.dtime(
                    self.trials == cond, self.durations, drop, False)

            # Get the named data, convert to tr time 
            # then add to the temp dm using the mask
            for col, name in enumerate(names):
                data_in_tr = roi.timing.dtime(
                            self.data[name], self.durations, drop, 0)

                dm_temp[mask_in_tr,col] = data_in_tr[mask_in_tr]
                    
            # Store the temporary DM in 
            # the final DM.
            if dm_param == None:
                dm_param = dm_temp  ## reinit
            else:
                dm_param = np.hstack((dm_param, dm_temp))  ## adding

        # Create the unit DM too, then combine them.
        # defining self.dm in the process
        self.create_dm(convolve=False)
        dm_unit = self.dm.copy(); self.dm = None  
            ## Copy and reset

        if box:
            self.dm = np.hstack((dm_unit, dm_param))
        else:
            baseline = dm_unit[:,0]
            baseline = baseline.reshape(baseline.shape[0], 1)
            self.dm = np.hstack((baseline, dm_param))
                ## If not including the boxcar,
                ## we still need the baseline model.

        # Orthgonalize the regessors?
        if orth: 
            self._orth_dm()

        # Convolve with self.hrf?
        if convolve: 
            self.dm = self._convolve_hrf(self.dm)


    def create_bold(self, preprocess=True):
        """ Extract the fMRI data from roi_name and <preprocess> it,
        if True. """
        
       # TODO - test 
        nifti = nibabel.nifti1.load(self.roi_name)

        # Isolate non-zero timecourses 
        data = nifti.get_data()
        mask = data < 0.1
            ## Bold data will alway be greater than 0.1
            ## but we want an inverted mask...
        
        # Create a masked array...
        mdata = np.ma.MaskedArray(data=data, mask=mask)

        # Use masked array to average over x, y, z
        # only for non-zero fMRI data, 
        # resulting in a 1d times series
        self.bold = np.asarray(mdata.mean(0).mean(0).mean(0).data)
        
        # Now archive the bold signal, 
        # prior to any preprocessing
        self._write_bold()

        if preprocess:
            self.bold = self._filter_array(self.bold)
        

    def fit(self, norm='zscore'):
        """ Calculate the regression parameters and statistics. """
        
        bold = self.bold.copy()
        dm = self.dm.copy()
        
        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data['movement']
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass
        
        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm
        
        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold),:]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()
    

    def contrast(self, contrast, name):
        """ Uses the current model to statistically compare predictors 
        (t-test), returning df, t and p values.
        
        <contrast> - a 1d list of [1,0,-1] the same length as the number
            of predictors in the model (sans the dummy, which is added
            silently). """
        
        if self.glm == None:
            raise ValueError("No glm present.  Try self.fit()?")
        
        contrast = self.glm.t_test(contrast)
            ## This a thin wrapper for 
            ## statsmodels contrast() method

        return contrast.df_denom, contrast.tvalue, contrast.pvalue


    def model_00(self):
        """ The simplest model, a univariate analysis of all conditions in 
        trials. """

        self.data['meta']['bold'] = self.roi_name
        self.data['meta']['dm'] = [str(cond) for cond in set(self.trials)]

        self.create_dm(convolve=True)

        self.fit(norm='zscore')


    def print_model_summary(self):
        """ Prints all defined model names and their docstrings. """

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        model_count = 0
        for attr in all_attr:
            a_s = re.split('_', attr)
            
            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == 'model') and (re.match('\A\d+\Z', a_s[1])):
                    
                    model_count += 1
                    model = attr
                        ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                                '{0} was not unique.'.format(model))
                    past_models.append(model)

                    # Now call the model and
                    # print its info out.
                    print("{0}. {1}:".format(model_count, model))
                    try:
                        func = getattr(self, model)
                        print(func.im_func.func_doc)                    
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue

    
    def run(self, code):
        """
        Run all defined models in order, returning their tabulated results.
        
        <code> - the unique batch or run code for this experiment.
        
        Models are any method of the form 'model_N' where N is an
        integer (e.g. model_2, model_1012 or model_666).  
        Models take no arguments (besides self).
        """
        
        self.results['batch_code'] = code
        
        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        for attr in all_attr:
            a_s = re.split('_', attr)
            
            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == 'model') and (re.match('\A\d+\Z', a_s[1])):
                    
                    model = attr
                        ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                                '{0} was not unique.'.format(model))
                    past_models.append(model)

                    # Now call the model and
                    # save its results.
                    print('Fitting {0}.'.format(model))
                    try:
                        getattr(self, model)()
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue

                    self.extract_results(name=model)
        
        return self.results
        

    def extract_results(self, name):
        """
        Saves most of the state of the current model to results, keyed
        on <name>.  Saves greedily, trading storage space for security 
        and redundancy.
        """

        tosave = {
            'roi_name':'roi_name',
            'TR':'TR',
            'trials':'trials',
            'duration;fs':'durations',
            'data':'data',
            'dm':'dm',
            'hrf':'hrf',
            'bold':'bold'
        }
            ## This list is only for attr hung directly off
            ## of self.
        
        # Add a name to results
        self.results[name] = {}
        
        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        for key, val in tosave.items():
            try:
                self.results[name][key] = deepcopy(getattr(self, val)())
            except TypeError:
                self.results[name][key] = deepcopy(getattr(self, val))
            except AttributeError:
                continue

        # Now add the reformatted data from the current model,
        # if any.
        self.results[name].update(self._reformat_model())

Example #7

Show file

File: base.py Project: parenthetical-e/roi

class Roi():
    """
    The basic. It does OLS regression. 
    """
    def __init__(self, TR, roi_name, trials, durations, data):
        # ---
        # User defined variables
        self.TR = TR
        self.roi_name = roi_name
        self.trials = np.array(trials)
        self.durations = np.array(durations)

        self.data = data
        if self.data == None:
            self.data = {}

        self.data['meta'] = {}  ## meta is for model metadata

        # ---
        # Intialize model data structues,
        # these take on values by calling:
        self.hrf = None  ## self.create_hrf()
        self.dm = None  ## self.create_dm() or create_dm_pararm()
        self.bold = None  ## self.create_bold()

        # ---
        # The two flavors of results
        self.glm = None  ## self.fit(), fit a regrssion model
        self.results = {}  ## self.extract_results() into a dict

    def _convolve_hrf(self, arr):
        """
        Convolves hrf basis with a 1 or 2d (column-oriented) array.
        """

        # self.hrf may or may not exist yet
        if self.hrf == None:
            raise ValueError('No hrf is defined. Try self.create_hrf()?')

        arr = np.asarray(arr)  ## Just in case

        # Assume 2d (or really N > 1 d),
        # fall back to 1d.
        arr_c = np.zeros_like(arr)
        try:
            for col in range(arr.shape[1]):
                arr_c[:, col] = np.convolve(arr[:, col],
                                            self.hrf)[0:arr.shape[0]]
                ## Convolve and truncate to length
                ## of arr
        except IndexError:
            arr_c = np.convolve(arr[:], self.hrf)[0:arr.shape[0]]

        return arr_c

    def _reformat_model(self):
        """
        Use extract_results() to store the simulation's state.
        
        This private method just extracts relevant data from the regression
        model into a dict.
        """

        tosave = {
            'beta': 'params',
            't': 'tvalues',
            'f_pvalue': 'f_pvalue',
            'fvalue': 'fvalue',
            'p': 'pvalues',
            'r': 'rsquared',
            'r_adj': 'rsquared_adj',
            'ci': 'conf_int',
            'resid': 'resid',
            'wresid': 'wresid',
            'aic': 'aic',
            'bic': 'bic',
            'llf': 'llf',
            'bse': 'bse',
            'nobs': 'nobs',
            'df_model': 'df_model',
            'df_resid': 'df_resid',
            'ess': 'ess',
            'ssr': 'ssr',
            'mse_model': 'mse_model',
            'mse_resid': 'mse_resid',
            'mse_total': 'mse_total',
            'pretty_summary': 'summary'
        }

        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        model_results = {}
        for key, val in tosave.items():
            try:
                model_results[key] = deepcopy(getattr(self.glm, val)())
            except TypeError:
                model_results[key] = deepcopy(getattr(self.glm, val))
            except AttributeError:
                continue

        return model_results

    def _filter_array(self, arr):
        """ Filter and smooth the 1 or 2 d <arr>ay. """

        if len(arr.shape) > 2:
            raise ValueError("<arr> must be 1 or 2d.")

        # Then use nitime to
        # high pass filte using FIR
        # (~1/128 s, same cutoff as SPM8's default)

        # FIR did well in:
        #
        # Comparison of Filtering Methods for fMRI Datasets
        # F. Kruggela, D.Y. von Cramona, X. Descombesa
        # NeuroImage 10 (5), 1999, 530 - 543.
        filtered = np.ones_like(arr)
        try:
            # Try 2d first...
            for col in range(arr.shape[1]):
                tsi = nitime.TimeSeries(arr[:, col], 1, self.TR)
                fsi = nitime.analysis.FilterAnalyzer(tsi, ub=None, lb=0.008)
                filtered[..., col] = fsi.fir.data
        except IndexError:
            # Fall back to 1d
            tsi = nitime.TimeSeries(arr, 1, self.TR)
            fsi = nitime.analysis.FilterAnalyzer(tsi, ub=None, lb=0.008)
            filtered = fsi.fir.data

        return filtered

    def _normalize_array(self, arr, function_name):
        """ Normalize the <arr>ay using the <function_name> 
        of one of the functions in roi.norm """

        return getattr(roi.norm, function_name)(arr)

    def _orth_dm(self):
        """ Orthgonalize (by regression) each col in self.dm with respect to 
        its left neighbor. """

        dm = self.dm
        ## Rename for brevity

        # Make sure conds and ncol dm
        # are divisors
        conds = list(set(self.trials))
        nconds = len(conds) - 1
        ## Drop baseline
        ncols = dm.shape[1] - 1
        if ncols % nconds:
            raise ValueError(
                "The number of condtions and shape of the dm are incompatible."
            )

        # If these are the same size there is nothing to
        # orthgonalize.
        if ncols != nconds:
            orth_dm = np.zeros_like(dm)
            orth_dm[:, 0] = dm[:, 0]
            ## Move baseline data over

            # Use num_col_per_cond, along with nconds
            # to find the strides we need to take along
            # the DM to orthgonalize each set of col(s)
            # belonging to each cond.
            num_col_per_cond = ncols / nconds
            for cond in conds:
                # Skip baseline
                if cond == 0: continue
                left = cond
                right = cond + nconds

                # Rolling loop over the cols_per_cond
                # orthgonalizing as we go.
                for cnt in range(num_col_per_cond - 1):
                    # Orthgonalize left col to right....
                    glm = GLS(dm[:, right], dm[:, left]).fit()  ## GLS(y, x)
                    orth_dm[:, right] = glm.resid
                    orth_dm[:, left] = dm[:, left]

                    # Shift indices for next iteration.
                    left = deepcopy(right)
                    right = right + nconds

            self.dm = orth_dm
        else:
            print("Nothing to orthgonalize.")

    def _write_bold(self):
        """ Write the bold signal to a file in ./bold. """

        no_exten = re.split('\.', self.roi_name)[0]
        pathsplit = re.split('/', no_exten)
        drop_path = pathsplit[-1]
        name = drop_path + '_bold.txt'
        ## Grab the non-file extension, non-path,
        ## parts of roi_name and
        ## append a new extention.

        # Create a bold dir nested where roi_name is stored,
        # if needed that is.
        bold_path = '/'.join(pathsplit[0:-1] + [
            'bold',
        ])
        if not os.path.exists(bold_path):
            os.mkdir(bold_path)

        # Write!
        fid = open('/'.join([bold_path, name]), 'w')
        self.bold.tofile(fid, sep='\n')
        fid.close()

    def create_hrf(self, function_name, params=None):
        """ Creates an hemodynamic response model using the 
        function named <function_name> in roi.hrfs using
        <params> which should be a dictionary of parameters that 
        function_name will accept when called.
        
        If <params> is None the default parameters are used, if 
        possible. 
        
        hrf function_names are 'mean_fir' and 'double_gamma'. """

        if params == None:
            self.hrf = getattr(roi.hrfs, function_name)(self)
        else:
            self.hrf = getattr(roi.hrfs, function_name)(self, **params)

    def create_dm(self, drop=None, convolve=True):
        """ Create a unit (boxcar-only) DM with one columns for each 
        condition in self.trials.  
        
         If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
        ## Find and sort conditions in trials

        # Some useful counts...
        num_conds = len(cond_levels)
        num_tr = np.sum(self.durations)
        num_trials = len(self.trials)

        # Map each condition in trials to a
        # 2d binary 2d array.  Each row is a trial
        # and each column is a condition.
        dm_unit = np.zeros((num_tr, num_conds))
        for col, cond in enumerate(cond_levels):
            # Create boolean array use it to
            # populate the dm with ones...
            # which must be in tr time.
            mask_in_tr = roi.timing.dtime(self.trials == cond, self.durations,
                                          None, False)

            dm_unit[mask_in_tr, col] = 1

        self.dm = dm_unit

        if convolve:
            self.dm = self._convolve_hrf(self.dm)

    def create_dm_param(self,
                        names,
                        drop=None,
                        box=True,
                        orth=False,
                        convolve=True):
        """ Create a parametric design matrix based on <names> in self.data. 
        
        If <box> a univariate dm is created that fills the leftmost
        side of the dm.

        If <orth> each regressor is orthgonalized with respect to its
        left-hand neighbor (excluding the baseline).
        
        If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
        ## Find and sort conditions in trials

        # Some useful counts...
        num_trials = len(self.trials)
        num_names = len(names)
        num_tr = np.sum(self.durations)

        dm_param = None
        ## Will eventually hold the
        ## parametric DM.

        for cond in cond_levels:
            if cond == 0:
                continue
                ## We add the baseline
                ## in at the end

            # Create a temp dm to hold this
            # condition's data
            dm_temp = np.zeros((num_tr, num_names))

            mask_in_tr = roi.timing.dtime(self.trials == cond, self.durations,
                                          drop, False)

            # Get the named data, convert to tr time
            # then add to the temp dm using the mask
            for col, name in enumerate(names):
                data_in_tr = roi.timing.dtime(self.data[name], self.durations,
                                              drop, 0)

                dm_temp[mask_in_tr, col] = data_in_tr[mask_in_tr]

            # Store the temporary DM in
            # the final DM.
            if dm_param == None:
                dm_param = dm_temp  ## reinit
            else:
                dm_param = np.hstack((dm_param, dm_temp))  ## adding

        # Create the unit DM too, then combine them.
        # defining self.dm in the process
        self.create_dm(convolve=False)
        dm_unit = self.dm.copy()
        self.dm = None
        ## Copy and reset

        if box:
            self.dm = np.hstack((dm_unit, dm_param))
        else:
            baseline = dm_unit[:, 0]
            baseline = baseline.reshape(baseline.shape[0], 1)
            self.dm = np.hstack((baseline, dm_param))
            ## If not including the boxcar,
            ## we still need the baseline model.

        # Orthgonalize the regessors?
        if orth:
            self._orth_dm()

        # Convolve with self.hrf?
        if convolve:
            self.dm = self._convolve_hrf(self.dm)

    def create_bold(self, preprocess=True):
        """ Extract the fMRI data from roi_name and <preprocess> it,
        if True. """

        # TODO - test
        nifti = nibabel.nifti1.load(self.roi_name)

        # Isolate non-zero timecourses
        data = nifti.get_data()
        mask = data < 0.1
        ## Bold data will alway be greater than 0.1
        ## but we want an inverted mask...

        # Create a masked array...
        mdata = np.ma.MaskedArray(data=data, mask=mask)

        # Use masked array to average over x, y, z
        # only for non-zero fMRI data,
        # resulting in a 1d times series
        self.bold = np.asarray(mdata.mean(0).mean(0).mean(0).data)

        # Now archive the bold signal,
        # prior to any preprocessing
        self._write_bold()

        if preprocess:
            self.bold = self._filter_array(self.bold)

    def fit(self, norm='zscore'):
        """ Calculate the regression parameters and statistics. """

        bold = self.bold.copy()
        dm = self.dm.copy()

        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data['movement']
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass

        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm

        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold), :]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()

    def contrast(self, contrast, name):
        """ Uses the current model to statistically compare predictors 
        (t-test), returning df, t and p values.
        
        <contrast> - a 1d list of [1,0,-1] the same length as the number
            of predictors in the model (sans the dummy, which is added
            silently). """

        if self.glm == None:
            raise ValueError("No glm present.  Try self.fit()?")

        contrast = self.glm.t_test(contrast)
        ## This a thin wrapper for
        ## statsmodels contrast() method

        return contrast.df_denom, contrast.tvalue, contrast.pvalue

    def model_00(self):
        """ The simplest model, a univariate analysis of all conditions in 
        trials. """

        self.data['meta']['bold'] = self.roi_name
        self.data['meta']['dm'] = [str(cond) for cond in set(self.trials)]

        self.create_dm(convolve=True)

        self.fit(norm='zscore')

    def print_model_summary(self):
        """ Prints all defined model names and their docstrings. """

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        model_count = 0
        for attr in all_attr:
            a_s = re.split('_', attr)

            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == 'model') and (re.match('\A\d+\Z', a_s[1])):

                    model_count += 1
                    model = attr
                    ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                            '{0} was not unique.'.format(model))
                    past_models.append(model)

                    # Now call the model and
                    # print its info out.
                    print("{0}. {1}:".format(model_count, model))
                    try:
                        func = getattr(self, model)
                        print(func.im_func.func_doc)
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue

    def run(self, code):
        """
        Run all defined models in order, returning their tabulated results.
        
        <code> - the unique batch or run code for this experiment.
        
        Models are any method of the form 'model_N' where N is an
        integer (e.g. model_2, model_1012 or model_666).  
        Models take no arguments (besides self).
        """

        self.results['batch_code'] = code

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        for attr in all_attr:
            a_s = re.split('_', attr)

            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == 'model') and (re.match('\A\d+\Z', a_s[1])):

                    model = attr
                    ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                            '{0} was not unique.'.format(model))
                    past_models.append(model)

                    # Now call the model and
                    # save its results.
                    print('Fitting {0}.'.format(model))
                    try:
                        getattr(self, model)()
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue

                    self.extract_results(name=model)

        return self.results

    def extract_results(self, name):
        """
        Saves most of the state of the current model to results, keyed
        on <name>.  Saves greedily, trading storage space for security 
        and redundancy.
        """

        tosave = {
            'roi_name': 'roi_name',
            'TR': 'TR',
            'trials': 'trials',
            'duration;fs': 'durations',
            'data': 'data',
            'dm': 'dm',
            'hrf': 'hrf',
            'bold': 'bold'
        }
        ## This list is only for attr hung directly off
        ## of self.

        # Add a name to results
        self.results[name] = {}

        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        for key, val in tosave.items():
            try:
                self.results[name][key] = deepcopy(getattr(self, val)())
            except TypeError:
                self.results[name][key] = deepcopy(getattr(self, val))
            except AttributeError:
                continue

        # Now add the reformatted data from the current model,
        # if any.
        self.results[name].update(self._reformat_model())

Example #8

Show file

File: expclass.py Project: parenthetical-e/simfMRI

class Exp():
    """
    A template class for running easily parallelizable event-related fMRI
    simulations.  
    
    Note: Exp() can"t be run as is.  For simple run-able experiments see 
        simfMRI.bin.examples.*
    """
    
    def __init__(self, TR=2, ISI=2, prng=None):
        # ----
        # These need to be set during subclassing
        # and before use.
        self.trials = None
        self.durations = None
        self.prng = None
            ## Hang a RandomState object off
            ## of self.
        
        self.data = {}
        self.data["meta"] = {}
            ## meta is for model metadata
        
        # ----
        # Other needed functions you
        # might want to override
        self.noise_f = white
        self.hrf_params = {"width":32,"TR":1,"a1":6.0,"a2":12.,
                "b1":0.9,"b2":0.9,"c":0.35}
        self.hrf = double_gamma(**self.hrf_params)
        # ----
        
        # ----
        # Setup globals,
        self.TR = TR
        self.ISI = ISI
        
        # --
        # and intialize the simulation"s (private) 
        # data structues
        self.dm = None      ## The design matrix (create_dm, create_dm_param).
        self.bold = None    ## The bold signal (create_bold).
        self.results = {}   ## Simulation results go here.
                            ## After a save_state() call.
        self.glm = None     ## Where the GLM object is stored after a fit call.
        # ----
        
        # ----
        # Config:
        # --
        # Move from ISI to ITI time, if needed.
        if (self.ISI % self.TR) > 0.0:
            raise ValueError("ISI must a even multiple of the TR.")
        elif self.ISI > self.TR:
            # Use multplier to transform data and trials into units
            # of TR from their native ISI.
            mult = self.ISI/self.TR
            
            trials_copy = deepcopy(self.trials)
                ## Needed to prevent circular updates
            
            trials_mult = []
            [trials_mult.extend([t,] + [0,]*(mult-1)) for t in trials_copy]
            
            data_copy = deepcopy(self.data)
            data_mult = defaultdict(list)
            for k,vals in data_copy.items():
                [data_mult[k].extend([v,] + [0,]*(mult-1)) for v in vals]
            
            # Finally replace results with the
            # expanded version
            self.trials = trials_mult
            self.data = data_mult
    
    
    def _normalize_array(self, arr, function_name):
        """ Normalize the <arr>ay using the <function_name> 
        of one of the functions in roi.norm """

        return getattr(norm, function_name)(arr)


    def _orth_dm(self):
        """ Orthgonalize (by regression) each col in self.dm with respect to 
        its left neighbor. """
        
        dm = self.dm  
            ## Rename for brevity
        
        # Make sure conds and ncol dm
        # are divisors
        conds = list(set(self.trials))
        nconds = len(conds) - 1     ## Drop baseline
        ncols = dm.shape[1] - 1
        if ncols % nconds:
            raise ValueError(
                "The number of condtions and shape of the dm are incompatible.")

        # If these are the same size there is nothing to
        # orthgonalize.
        if ncols != nconds:
            orth_dm = np.zeros_like(dm)
            orth_dm[:,0] = dm[:,0]
                ## Move baseline data over

            # Use num_col_per_cond, along with nconds 
            # to find the strides we need to take along
            # the DM to orthgonalize each set of col(s) 
            # belonging to each cond.
            num_col_per_cond = ncols / nconds
            for cond in conds:
                # Skip baseline
                if cond == 0: 
                    continue
                
                left = cond
                right = cond + nconds
                
                # Rolling loop over the cols_per_cond
                # orthgonalizing as we go.
                # Note: we never use cnt directly...
                for cnt in range(num_col_per_cond-1):
                    # Orthgonalize left col to right....
                    glm = GLS( dm[:,right], dm[:,left]).fit()  ## GLS(y, x)
                    orth_dm[:,right] = glm.resid
                    orth_dm[:,left] = dm[:,left]
                   
                    # Shift indices for next iteration.
                    left = deepcopy(right)
                    right = right + nconds

            self.dm = orth_dm
        else:
            print("Nothing to orthgonalize.")
    
    
    def _convolve_hrf(self, arr):
        """
        Convolves hrf basis with a 1 or 2d (column-oriented) array.
        """
        
        # self.hrf may or may not exist yet
        if self.hrf == None:
            raise ValueError("No hrf is defined. Try self.create_hrf()?")
        
        arr = np.asarray(arr)   ## Just in case 

        # Assume 2d (or really N > 1 d), 
        # fall back to 1d.
        arr_c = np.zeros_like(arr)
        try:
            for col in range(arr.shape[1]):
                arr_c[:,col] = np.convolve(
                        arr[:,col], self.hrf)[0:arr.shape[0]]
                    ## Convolve and truncate to length
                    ## of arr
        except IndexError:
            arr_c = np.convolve(arr[:], self.hrf)[0:arr.shape[0]]
        
        return arr_c
    
    
    def _reformat_model(self):
        """
        Use save_state() to store the simulation"s state. This private 
        method just extracts relevant data from the regression model 
        into a dict.
        """

        tosave = {
            "beta":"params",
            "t":"tvalues",
            "fvalue":"fvalue",
            "p":"pvalues",
            "r":"rsquared",
            "ci":"conf_int",
            "resid":"resid",
            "aic":"aic",
            "bic":"bic",
            "llf":"llf",
            "mse_model":"mse_model",
            "mse_resid":"mse_resid",
            "mse_total":"mse_total"
        }
        
        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        model_results = {}
        for k,v in tosave.items():
            try:
                model_results[k] = deepcopy(getattr(self.glm,v)())
            except TypeError:
                model_results[k] = deepcopy(getattr(self.glm,v))
            except AttributeError:
                continue
        
        return model_results
    
    
    def _generate_doc(self, name, bold, dm, dm_params):
        """ Generate a doc string for the current model. """
    
        if dm_params.get("box"):
            doc =  """ {0}. Bold: {1}. DM: {2}. """.format(
                    name, bold, ["baseline", "box"] + dm)
        else:
            doc =  """ {0}. Bold: {1}. DM: {2}. """.format(
                    name, bold, ["baseline", ] + dm)
                    ## baseline is added automigcally
                    ## during create_dm... so we add 
                    ## it here too
        return doc

    
    def _template_model(self, bold, dm, bold_params, dm_params, norm):
        """ A template model used by populate_models() to create all
        the regression models used during a run().
        
        Note: __doc__ for this function gets redone dynamically. """
        
        self.data["meta"]["bold"] = deepcopy(bold)

        # If were dealing with a parametric set
        # and a univariate box is bieng used
        if dm_params.get("box"):
            self.data["meta"]["dm"] = ["baseline", "box"] + dm
        else:
            # otherwise just account for the baseline
            self.data["meta"]["dm"] = ["baseline", ] + dm
        
        # Try to unpack dm_params into create_dm
        # first, but if there are too many args
        # ("TypeError") try create_dm_param
        if len(dm_params) == 2:
            # Setup the dm,
            self.create_dm(**dm_params)
            
            # and the univariate bold.
            boldcol = [dm.index(b)+1 for b in bold]  
                ## +1 for baseline
            
            self.create_bold(self.dm[:,boldcol], **bold_params)
        elif len(dm_params) == 4:

            # Setup the dm,
            self.create_dm_param(names=dm, **dm_params)
            
            # then the parametric bold 
            # from self.data[]
            boldarr = np.array(self.data[bold.pop()])  ## Init

            # Only goes if len(bold) > 1, see pop above.
            for b in bold:
                boldarr = np.vstack((boldarr, np.array(self.data[b])))
            
            self.create_bold(boldarr, **bold_params)
        else:
            raise ValueError(
                "dm_params has the wrong number of arguments.")

        self.fit(norm=norm)
    
    
    def create_dm(self, drop=None, convolve=True):
        """ Create a unit (boxcar-only) DM with one columns for each 
        condition in self.trials.  
        
         If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
            ## Find and sort conditions in trials
        
        # Some useful counts...
        num_conds = len(cond_levels)
        num_tr = np.sum(self.durations)

        # Map each condition in trials to a
        # 2d binary 2d array.  Each row is a trial
        # and each column is a condition.
        dm_unit = np.zeros((num_tr, num_conds))
        for col, cond in enumerate(cond_levels):
            # Create boolean array use it to 
            # populate the dm with ones... 
            # which must be in tr time.
            mask_in_tr = dtime(
                    self.trials == cond, self.durations, drop, False)

            dm_unit[mask_in_tr,col] = 1

        self.dm = dm_unit
        
        if convolve:
            self.dm = self._convolve_hrf(self.dm)


    def create_dm_param(self, names, drop=None, box=True, orth=False, convolve=True):
        """ Create a parametric design matrix based on <names> in self.data. 
        
        If <box> a univariate dm is created that fills the leftmost
        side of the dm.

        If <orth> each regressor is orthgonalized with respect to its
        left-hand neighbor (excluding the baseline).
        
        If <convolve> the dm is convolved with the HRF (self.hrf). """

        cond_levels = sorted(list(set(self.trials)))
            ## Find and sort conditions in trials
       
        # Some useful counts...
        num_names = len(names)
        num_tr = np.sum(self.durations)
        dm_param = None
            ## Will eventually hold the 
            ## parametric DM.

        for cond in cond_levels:
            if cond == 0:
                continue
                    ## We add the baseline 
                    ## in at the end

            # Create a temp dm to hold this
            # condition"s data
            dm_temp = np.zeros((num_tr, num_names))
            mask_in_tr = dtime(
                    self.trials == cond, self.durations, drop, False)

            # Get the named data, convert to tr time 
            # then add to the temp dm using the mask
            for col, name in enumerate(names):
                data_in_tr = dtime(
                            self.data[name], self.durations, drop, 0)

                dm_temp[mask_in_tr,col] = data_in_tr[mask_in_tr]
                    
            # Store the temporary DM in 
            # the final DM.
            if dm_param == None:
                dm_param = dm_temp  ## reinit
            else:
                dm_param = np.hstack((dm_param, dm_temp))  ## adding

        # Create the unit DM too, then combine them.
        # defining self.dm in the process
        self.create_dm(convolve=False)
        dm_unit = self.dm.copy()
        self.dm = None  
            ## Copy and reset

        if box:
            self.dm = np.hstack((dm_unit, dm_param))
        else:
            baseline = dm_unit[:,0]
            baseline = baseline.reshape(baseline.shape[0], 1)
            self.dm = np.hstack((baseline, dm_param))
                ## If not including the boxcar,
                ## we still need the baseline model.

        # Orthgonalize the regessors?
        if orth: 
            self._orth_dm()

        # Convolve with self.hrf?
        if convolve: 
            self.dm = self._convolve_hrf(self.dm)
    
    
    def create_bold(self, arr, convolve=False):
        """ 
        The provided <arr>ay becomes a noisy bold signal. 
        
        <convolve> - if True, the dm is convolved with the HRF defined by
        self.hrf().
        """
        
        arr = np.array(arr)
        try:
            self.bold = arr.sum(1)
                ## Sum cols, arr might 
                ## have been 2d, need 1d.
        except ValueError:
            self.bold = arr
        
        # HRF?
        if convolve:
            self.bold = self._convolve_hrf(self.bold)
        
        # And add noise.
        noise, self.prng = self.noise_f(N=self.bold.shape[0], prng=self.prng)
        self.bold += noise
    
    
    def save_state(self, name):
        """
        Saves most of the state of the current simulation to results, keyed
        on <name>.  Saves greedily, trading storage space for security and
        redundancy.
        """
        
        tosave = {
            "TR":"TR",
            "ISI":"ISI",
            "trials":"trials",
            "data":"data",
            "dm":"dm",
            "bold":"bold"
        }
            ## This list is only for attr hung directly off
            ## of self.
        
        # Add a name to results
        self.results[name] = {}
        
        # Try to get each attr (a value in the dict above)
        # first as function (without args) then as a regular
        # attribute.  If both fail, silently move on.
        for k,v in tosave.items():
            try:
                self.results[name][k] = deepcopy(getattr(self,v)())
            except TypeError:
                self.results[name][k] = deepcopy(getattr(self,v))
            except AttributeError:
                continue

        # Now add the reformatted data from the current model,
        # if any.
        self.results[name].update(self._reformat_model())

    
    def populate_models(self, model_config):
        """ Use <model_config> to populate the experiment with models. 
        
        Note:
        Models become methods that must follow the naming convention,
        model_XX.., where XX.. is more than two integers [0-9].  
        
        For example: model_01, model_010, and model_69 are valid, 
        while model_A1, model_1 are not. """
        
        # Read in the model_config and loop
        # over its
        conf = ConfigParser.ConfigParser()
        readresult = conf.read(model_config)
            ## If conf.read() can't find model_config
            ## it (annoyingly) returns an empty list
            ## in python empty lists are false, so...

        if readresult:
            print(conf.sections()) 
                ## Tell the user about the models.
            
            for sec in conf.sections():
                # Get the config data for sec
                dm_params = eval(conf.get(sec, "dm_params"))
                bold_params = eval(conf.get(sec, "bold_params"))
                bold = eval(conf.get(sec, "bold"))
                dm = eval(conf.get(sec, "dm"))
                norm = eval(conf.get(sec, "norm"))
                    ## Note: Using eval is slow and REALLY unsafe

                # Close on _template_model, update its
                # __doc__ and hang it on self as <sec>.
                parmodel = partial(self._template_model, bold, dm, 
                        bold_params, dm_params, norm)
                parmodel.__doc__ = self._generate_doc(sec, bold, dm, dm_params)
            
                print("Created:{0}" .format(parmodel.__doc__))
                setattr(self, sec, parmodel)
                    ## setattr magic to add 
                    ## <parmodel> to self as <sec>
        else:
            raise IOError("No such file: '{0}'".format(model_config))
    
    
    def fit(self, norm="zscore"):
        """ Calculate the regression parameters and statistics. """
        
        bold = self.bold.copy()
        dm = self.dm.copy()
        
        # Normalize both the bold and dm
        if norm != None:
            bold = self._normalize_array(bold, norm)
            dm = self._normalize_array(dm, norm)

        # Add movement regressors... if present
        try:
            dm_movement = self.data["movement"]
            dm = np.vstack((dm, dm_movement))
        except KeyError:
            pass
        
        # Append a dummy predictor and run the regression
        #
        # Dummy is added at the last minute so it does not
        # interact with normalization or smoothing routines.
        dm_dummy = np.ones((dm.shape[0], dm.shape[1] + 1))
        dm_dummy[0:dm.shape[0], 0:dm.shape[1]] = dm
        
        # Truncate bold or dm_dummy if needed, and Go!
        try:
            bold = bold[0:dm_dummy.shape[0]]
            dm_dummy = dm_dummy[0:len(bold),:]
        except IndexError:
            pass

        self.glm = GLS(bold, dm_dummy).fit()
    
    
    def contrast(self, contrast):
        """ Uses the current model to statistically compare predictors 
        (t-test), returning df, t and p values.
        
        <contrast> - a 1d list of [1,0,-1] the same length as the number
            of predictors in the model (sans the dummy, which is added
            silently). """
        
        if self.glm == None:
            raise ValueError("No glm present.  Try self.fit()?")
        
        contrast = self.glm.t_test(contrast)
            ## This a thin wrapper for 
            ## statsmodels contrast() method

        return contrast.df_denom, contrast.tvalue, contrast.pvalue


    def print_model_summary(self):
        """ Prints all defined model names and their docstrings. """

        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        model_count = 0
        for attr in all_attr:
            a_s = re.split("_", attr)
            
            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == "model") and (re.match("\A\d+\Z", a_s[1])):
                    
                    model_count += 1
                    model = attr
                        ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                                "{0} was not unique.".format(model))
                    past_models.append(model)

                    # Now call the model and
                    # print its info out.
                    print("{0}. {1}:".format(model_count, model))
                    try:
                        func = getattr(self, model)
                        print(func.im_func.func_doc)                    
                    except KeyError:
                        print("Data not Found.  Moving on.")
                        continue


    def run(self,code):
        """
        Run all defined models in order, returning their tabulated results.
        
        <code> - the unique batch or run code for this experiment.
        
        Models are any method of the form "model_N" where N is an
        integer (e.g. model_2, model_1012 or model_666).  Models take no
        arguments.
        """
        
        self.results["batch_code"] = code
        
        # find all self.model_N attritubes and run them.
        all_attr = dir(self)
        all_attr.sort()
        past_models = []
        for attr in all_attr:
            a_s = re.split("_",attr)
            
            # Match only model_N where N is an integer
            if len(a_s) == 2:
                if (a_s[0] == "model") and (re.match("\A\d+\Z",a_s[1])):
                    
                    model = attr
                        ## Rename for clarity

                    # Model name must be unique.
                    if model in past_models:
                        raise AttributeError(
                                "{0} was not unique.".format(model))
                    past_models.append(model)

                    # Now call the model and
                    # save its results.
                    print("Fitting {0}.".format(model))
                    try:
                        getattr(self, model)()
                    except KeyError:
                        # Missing model should just be skipped.
                        print("Data not Found.  Moving on.")
                        continue
                    
                    self.save_state(name=model)
        
        return self.results