Ejemplos de copnorm_nd en Python, ejemplos de frites.core.copnorm_nd en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: danieltomasz/frites

def gccmi_1d_ccc(x, y, z, biascorrect=True):
    """Gaussian-Copula CMI between three continuous variables.

    I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly
    multidimensional) continuous variables, x and y, conditioned on a third, z,
    estimated via a Gaussian copula.

    Parameters
    ----------
    x, y, z : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs).

    Returns
    -------
    i : float
        Information shared by x and y conditioned by z (in bits)
    """
    x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")

    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    nvarz = z.shape[0]

    if y.shape[1] != ntrl or z.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    cy = copnorm_nd(y, axis=1)
    cz = copnorm_nd(z, axis=1)
    # parametric Gaussian CMI
    return cmi_1d_ggg(cx, cy, cz, biascorrect=True, demeaned=True)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: danieltomasz/frites

def gcmi_1d_cc(x, y):
    """Gaussian-Copula MI between two continuous variables.

    I = gcmi_cc(x,y) returns the MI between two (possibly multidimensional)
    continuous variables, x and y, estimated via a Gaussian copula.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs)

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    x, y = np.atleast_2d(x), np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    nvarx, ntrl = x.shape
    nvary = y.shape[0]

    if y.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # copula normalization
    cx, cy = copnorm_nd(x, axis=1), copnorm_nd(y, axis=1)
    # parametric Gaussian MI
    return mi_1d_gg(cx, cy, True, True)

Ejemplo n.º 3

0

Mostrar archivo

 def test_copnorm_nd(self):
     """Test function copnorm_nd."""
     _arr = np.random.randint(0, 10, (20,))
     arr_v = np.c_[_arr, _arr]
     arr_h = arr_v.T
     cp_v = copnorm_nd(arr_v, axis=0)
     cp_h = copnorm_nd(arr_h, axis=1)
     assert (cp_v[:, 0] == cp_v[:, 1]).all()
     assert (cp_h == cp_v.T).all()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: meronvermaas/frites

def gccmi_1d_ccc(x, y, z, verbose=None):
    """Gaussian-Copula CMI between three continuous variables.

    I = gccmi_1d_ccc(x,y,z) returns the CMI between two (possibly
    multidimensional) continuous variables, x and y, conditioned on a third, z,
    estimated via a Gaussian copula.

    Parameters
    ----------
    x, y, z : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs).

    Returns
    -------
    i : float
        Information shared by x and y conditioned by z (in bits)
    """
    set_log_level(verbose)
    x, y, z = np.atleast_2d(x), np.atleast_2d(y), np.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")

    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    nvarz = z.shape[0]

    if y.shape[1] != ntrl or z.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(nvary):
        if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break
    for zi in range(nvarz):
        if (np.unique(z[zi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    cy = copnorm_nd(y, axis=1)
    cz = copnorm_nd(z, axis=1)
    # parametric Gaussian CMI
    return cmi_1d_ggg(cx, cy, cz, True, True)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: danieltomasz/frites

def gcmi_model_1d_cd(x, y):
    """Gaussian-Copula MI between a continuous and a discrete variable.

    This method is based on ANOVA style model comparison.
    I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly
    multidimensional) continuous variable x and the discrete variable y.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    x, y = np.atleast_2d(x), np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    # parametric Gaussian MI
    return mi_model_1d_gd(cx, y, True, True)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: danieltomasz/frites

def gcmi_mixture_1d_cd(x, y):
    """Gaussian-Copula MI between a continuous and a discrete variable.

    This method evaluate MI from a Gaussian mixture.

    The Gaussian mixture is fit using robust measures of location (median) and
    scale (median absolute deviation) for each class.
    I = gcmi_mixture_cd(x,y) returns the MI between the (possibly
    multidimensional).

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    x, y = np.atleast_2d(x), np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape
    ym = np.unique(y)

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    # copula normalise each class
    # shift and rescale to match loc and scale of raw data
    # this provides a robust way to fit the gaussian mixture
    classdat = []
    ydat = []
    for yi in ym:
        # class conditional data
        idx = y == yi
        xm = x[:, idx]
        cxm = copnorm_nd(xm, axis=1)

        xmmed = np.median(xm, axis=1)[:, np.newaxis]
        # robust measure of s.d. under Gaussian assumption from median
        # absolute deviation
        xmmad = np.median(np.abs(xm - xmmed), axis=1)[:, np.newaxis]
        cxmscaled = cxm * (1.482602218505602 * xmmad)
        # robust measure of loc from median
        cxmscaled = cxmscaled + xmmed
        classdat.append(cxmscaled)
        ydat.append(yi * np.ones(xm.shape[1], dtype=np.int))

    cx = np.concatenate(classdat, axis=1)
    newy = np.concatenate(ydat)
    return mi_mixture_1d_gd(cx, newy)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: ds_ephy.py Proyecto: meronvermaas/frites

    def copnorm(self, mi_type='cc', gcrn_per_suj=True):
        """Apply the Gaussian-Copula rank normalization.

        The copnorm is only applied to continuous variables.

        Parameters
        ----------
        mi_type : {'cc', 'cd', 'ccd'}
            The copnorm depends on the mutual-information type that is going to
            be performed. Choose either 'cc' (continuous / continuous), 'cd'
            (continuous / discret) or 'ccd' (continuous / continuous / discret)
        gcrn_per_suj : bool | True
            Apply the Gaussian-rank normalization either per subject (True)
            or across subjects (False).
        """
        assert mi_type in ['cc', 'cd', 'ccd']
        # do not enable to copnorm two times
        if isinstance(self._copnormed, str):
            logger.warning("Data already copnormed. Copnorm ignored")
            return None
        logger.info(f"    Apply copnorm (per subject={gcrn_per_suj}; "
                    f"mi_type={mi_type})")
        # copnorm applied differently how data have been organized
        if self._groupedby == "roi":
            if gcrn_per_suj:  # per subject
                logger.debug("copnorm applied per subjects")
                self._x = [
                    copnorm_cat_nd(k, i, axis=-1)
                    for k, i in zip(self._x, self.suj_roi)
                ]
                if mi_type in ['cc', 'ccd']:
                    self._y = [
                        copnorm_cat_nd(k, i, axis=0)
                        for k, i in zip(self._y, self.suj_roi)
                    ]
            else:  # subject-wise
                logger.debug("copnorm applied across subjects")
                self._x = [copnorm_nd(k, axis=-1) for k in self._x]
                if mi_type in ['cc', 'ccd']:
                    self._y = [copnorm_nd(k, axis=0) for k in self._y]
        elif self._groupedby == "subject":
            raise NotImplementedError("FUTURE WORK")

        self._copnormed = f"{int(gcrn_per_suj)}-{mi_type}"

Ejemplo n.º 8

0

Mostrar archivo

def _conn_dfc(x_w, x_s, x_t, roi_idx, gcrn):
    """Parallel function for computing DFC."""
    dfc = np.zeros((x_w.shape[0], len(x_s)))
    # copnorm data only once
    if gcrn:
        x_w = copnorm_nd(x_w, axis=2)
    # compute dfc
    for n_p, (s, t) in enumerate(zip(x_s, x_t)):
        # select sources and targets time-series
        _x_s = x_w[:, roi_idx[s], :]
        _x_t = x_w[:, roi_idx[t], :]
        # compute mi between time-series
        dfc[:, n_p] = mi_nd_gg(_x_s,
                               _x_t,
                               traxis=-1,
                               mvaxis=-2,
                               shape_checking=False)
    return dfc

Ejemplo n.º 9

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: meronvermaas/frites

def gcmi_model_1d_cd(x, y, verbose=None):
    """Gaussian-Copula MI between a continuous and a discrete variable.

    This method is based on ANOVA style model comparison.
    I = gcmi_model_cd(x,y,Ym) returns the MI between the (possibly
    multidimensional) continuous variable x and the discrete variable y.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    set_log_level(verbose)
    x, y = np.atleast_2d(x), np.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(y.dtype, np.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break

    # copula normalization
    cx = copnorm_nd(x, axis=1)
    # parametric Gaussian MI
    return mi_model_1d_gd(cx, y, True, True)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_ds_ephy.py Proyecto: danieltomasz/frites

 def test_copnorm(self):
     """Test function copnorm."""
     # build dataset
     d_3d = self._get_data(3)
     ds = DatasetEphy(d_3d, y='y', z='z', **kw)
     # check copnorm range
     ds_roi2 = ds.get_roi_data("roi_2", copnorm=False)
     s1_r2, s2_r2 = d_3d[0].sel(roi='roi_2'), d_3d[1].sel(roi='roi_2')
     s12 = xr.concat((s1_r2, s2_r2), 'trials').T.expand_dims('mv', axis=-2)
     assert 9. < ds_roi2.data.ravel().mean() < 11.
     np.testing.assert_array_equal(s12.data, ds_roi2.data)
     ds_roi2 = ds.get_roi_data("roi_2", copnorm=True)
     assert -1. < ds_roi2.data.ravel().mean() < 1.
     # check values (gcrn_per_suj=False)
     gc_t = ds.get_roi_data("roi_2", copnorm=True, gcrn_per_suj=False)
     np.testing.assert_array_equal(copnorm_nd(s12.data), gc_t.data)
     # check values (gcrn_per_suj=True)
     gc_t = ds.get_roi_data("roi_2", copnorm=True, gcrn_per_suj=True)
     np.testing.assert_array_equal(
         copnorm_cat_nd(s12.data, gc_t['subject'].data), gc_t.data)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: conn_transfer_entropy.py Proyecto: meronvermaas/frites

def conn_transfer_entropy(x, max_delay=30, pairs=None, gcrn=True):
    """Across-trials transfer entropy.

    The transfer entropy represents the amount of information that is send
    from a source to a target. It is defined as :

    .. math::

        TE = I(source_{past}; target_{present} | target_{past})

    Where :math:`past` is defined using the `max_delay` input parameter. Note
    that the transfer entropy only provides about the amount of information
    that is sent, not on the content.

    Parameters
    ----------
    x : array_like
        Array of data of shape (n_roi, n_times, n_epochs). Must be a gaussian
        variable
    max_delay : int | 30
        Number of time points defining where to stop looking at in the past.
        Increasing this maximum delay input can lead to slower computations
    pairs : array_like
        Array of pairs to consider for computing the transfer entropy. It
        should be an array of shape (n_pairs, 2) where the first column refers
        to sources and the second to targets. If None, all pairs will be
        computed
    gcrn : bool | True
        Apply a Gaussian Copula rank normalization

    Returns
    -------
    te : array_like
        The transfer entropy array of shape (n_pairs, n_times - max_delay)
    pairs : array_like
        Pairs vector use for computations of shape (n_pairs, 2)
    """
    # -------------------------------------------------------------------------
    # check pairs
    n_roi, n_times, n_epochs = x.shape
    if not isinstance(pairs, np.ndarray):
        pairs = np.c_[np.where(~np.eye(n_roi, dtype=bool))]
    assert isinstance(pairs, np.ndarray) and (pairs.ndim == 2) and (
        pairs.shape[1] == 2), ("`pairs` should be a 2d array of shape "
                               "(n_pairs, 2) where the first column refers to "
                               "sources and the second to targets")
    x_all_s, x_all_t = pairs[:, 0], pairs[:, 1]
    n_pairs = len(x_all_s)
    # check max_delay
    assert isinstance(max_delay, (int, np.int)), ("`max_delay` should be an "
                                                  "integer")
    # check input data
    assert (x.ndim == 3), ("input data `x` should be a 3d array of shape "
                           "(n_roi, n_times, n_epochs)")
    x = x[..., np.newaxis, :]

    # -------------------------------------------------------------------------
    # apply copnorm
    if gcrn:
        x = copnorm_nd(x, axis=-1)

    # -------------------------------------------------------------------------
    # compute the transfer entropy
    te = np.zeros((n_pairs, n_times - max_delay), dtype=float)
    for n_s, x_s in enumerate(x_all_s):
        # select targets
        is_source = x_all_s == x_s
        x_t = x_all_t[is_source]
        targets = x[x_t, ...]
        # tile source
        source = np.tile(x[[x_s], ...], (targets.shape[0], 1, 1, 1))
        # loop over remaining time points
        for n_d, d in enumerate(range(max_delay + 1, n_times)):
            t_pres = np.tile(targets[:, [d], :], (1, max_delay, 1, 1))
            past = slice(d - max_delay - 1, d - 1)
            s_past = source[:, past, ...]
            t_past = targets[:, past, ...]
            # compute the transfer entropy
            _te = cmi_nd_ggg(s_past, t_pres, t_past, **CONFIG["KW_GCMI"])
            # take the sum over delays
            te[is_source, n_d] = _te.mean(1)

    return te, pairs

Ejemplo n.º 12

0

Mostrar archivo

def conn_dfc(data,
             win_sample,
             times=None,
             roi=None,
             n_jobs=1,
             gcrn=True,
             verbose=None):
    """Single trial Dynamic Functional Connectivity.

    This function computes the Dynamic Functional Connectivity (DFC) using the
    Gaussian Copula Mutual Information (GCMI). The DFC is computed across time
    points for each trial. Note that the DFC can either be computed on windows
    manually defined or on sliding windows.

    Parameters
    ----------
    data : array_like
        Electrophysiological data array of a single subject organized as
        (n_epochs, n_roi, n_times)
    win_sample : array_like
        Array of shape (n_windows, 2) describing where each window start and
        finish. You can use the function :func:`frites.conn.define_windows`
        to define either manually either sliding windows.
    times : array_like | None
        Time vector array of shape (n_times,)
    roi : array_like | None
        ROI names of a single subject
    n_jobs : int | 1
        Number of jobs to use for parallel computing (use -1 to use all
        jobs). The parallel loop is set at the pair level.
    gcrn : bool | True
        Specify if the Gaussian Copula Rank Normalization should be applied.
        If the data are normalized (e.g z-score) this parameter can be set to
        False because the data can be considered as gaussian over time.

    Returns
    -------
    dfc : array_like
        The DFC array of shape (n_epochs, n_pairs, n_windows)

    See also
    --------
    define_windows, conn_covgc
    """
    set_log_level(verbose)
    # -------------------------------------------------------------------------
    # inputs conversion
    data, trials, roi, times, attrs = conn_io(data,
                                              roi=roi,
                                              times=times,
                                              verbose=verbose)

    # -------------------------------------------------------------------------
    # data checking
    n_epochs, n_roi, n_pts = data.shape
    assert (len(roi) == n_roi) and (len(times) == n_pts)
    assert isinstance(win_sample, np.ndarray) and (win_sample.ndim == 2)
    assert win_sample.dtype in CONFIG['INT_DTYPE']
    n_win = win_sample.shape[0]
    # get the non-directed pairs
    x_s, x_t = np.triu_indices(n_roi, k=1)
    n_pairs = len(x_s)
    pairs = np.c_[x_s, x_t]
    # build roi pairs names
    roi_p = [f"{roi[s]}-{roi[t]}" for s, t in zip(x_s, x_t)]

    # -------------------------------------------------------------------------
    # compute dfc
    logger.info(f'Computing DFC between {n_pairs} pairs (gcrn={gcrn})')
    # get the parallel function
    parallel, p_fun = parallel_func(mi_nd_gg,
                                    n_jobs=n_jobs,
                                    verbose=verbose,
                                    prefer='threads')
    pbar = ProgressBar(range(n_win), mesg='Estimating DFC')

    dfc = np.zeros((n_epochs, n_pairs, n_win), dtype=np.float32)
    with parallel as para:
        for n_w, w in enumerate(win_sample):
            # select the data in the window and copnorm across time points
            data_w = data[..., w[0]:w[1]]
            # apply gcrn over time
            if gcrn:
                data_w = copnorm_nd(data_w, axis=2)
            # compute mi between pairs
            _dfc = para(
                p_fun(data_w[:, [s], :], data_w[:,
                                                [t], :], **CONFIG["KW_GCMI"])
                for s, t in zip(x_s, x_t))
            dfc[..., n_w] = np.stack(_dfc, axis=1)
            pbar.update_with_increment_value(1)

    # -------------------------------------------------------------------------
    # dataarray conversion
    win_times = times[win_sample]
    dfc = xr.DataArray(dfc,
                       dims=('trials', 'roi', 'times'),
                       name='dfc',
                       coords=(trials, roi_p, win_times.mean(1)))
    # add the windows used in the attributes
    cfg = dict(win_sample=np.r_[tuple(win_sample)],
               win_times=np.r_[tuple(win_times)],
               type='dfc')
    dfc.attrs = {**cfg, **attrs}

    return dfc

Ejemplo n.º 13

0

Mostrar archivo

    def get_roi_data(self,
                     roi,
                     groupby='subjects',
                     mi_type='cc',
                     copnorm=True,
                     gcrn_per_suj=True):
        """Get the data of a single brain region.

        Parameters
        ----------
        roi : string
            ROI name to get
        groupby : {'subjects'}
            Specify if the data across subjects have to be concatenated
        mi_type : {'cc', 'cd', 'ccd'}
            The type of mutual-information that is then going to be used. This
            is going to have an influence on how the data are organized and
            how the copnorm is going to be applied
        copnorm : bool | True
            Apply the gaussian copula rank normalization
        gcrn_per_suj : bool | True
            Specify whether the gaussian copula rank normalization have to be
            applied per subject (True - RFX) or across subjects (False - FFX)

        Returns
        -------
        da : xr.DataArray
            The data of the single brain region
        """
        # list of subjects present in the desired roi
        suj_list = self._df_rs.loc[roi, 'subjects']

        # group data across subjects
        if groupby == 'subjects':
            x_r_ms = []
            for s in suj_list:
                # roi (possibly multi-sites) selection
                x_roi = self._x[s].sel(roi=self._x[s]['roi'].data == roi)
                # stack roi and trials
                x_roi = x_roi.stack(rtr=('roi', 'trials'))
                x_r_ms.append(x_roi)
            x_r_ms = xr.concat(x_r_ms, 'rtr')
            # 4d or multivariate
            if self._multivariate:
                x_r_ms = x_r_ms.transpose('times', 'mv', 'rtr')
            else:
                x_r_ms = x_r_ms.expand_dims('mv', axis=-2)
            x_coords = list(x_r_ms.coords)

            # channels aggregation
            if not self._agg_ch and ('y' in x_coords):
                # shortcuts
                ch_id = x_r_ms['agg_ch'].data
                y = x_r_ms['y'].data
                # transformation depends on mi_type
                if mi_type == 'cd':
                    # I(C; D) where the D=[y, ch_id]
                    ysub = np.c_[y, ch_id]
                    x_r_ms['y'].data = multi_to_uni_conditions([ysub],
                                                               False)[0]
                elif mi_type == 'ccd' and ('z' not in x_coords):
                    # I(C; C; D) where D=ch_id. In that case z=D
                    x_r_ms = x_r_ms.assign_coords(z=('rtr', ch_id))
                elif mi_type == 'ccd' and ('z' in x_coords):
                    # I(C; C; D) where D=[z, ch_id]
                    zsub = np.c_[x_r_ms['z'].data, ch_id]
                    x_r_ms['z'].data = multi_to_uni_conditions([zsub],
                                                               False)[0]
                else:
                    raise ValueError("Can't avoid aggregating channels")

            # gaussian copula rank normalization
            if copnorm:
                if gcrn_per_suj:  # gcrn per subject
                    logger.debug("copnorm applied per subjects")
                    suj = x_r_ms['subject'].data
                    x_r_ms.data = copnorm_cat_nd(x_r_ms.data, suj, axis=-1)
                    if (mi_type in ['cc', 'ccd']) and ('y' in x_coords):
                        x_r_ms['y'].data = copnorm_cat_nd(x_r_ms['y'].data,
                                                          suj,
                                                          axis=0)
                else:  # gcrn across subjects
                    logger.debug("copnorm applied across subjects")
                    x_r_ms.data = copnorm_nd(x_r_ms.data, axis=-1)
                    if (mi_type in ['cc', 'ccd']) and ('y' in x_coords):
                        x_r_ms['y'].data = copnorm_nd(x_r_ms['y'].data, axis=0)

            return x_r_ms

Ejemplo n.º 14

0

Mostrar archivo

Archivo: gcmi_1d.py Proyecto: meronvermaas/frites

def gccmi_1d_ccd(x, y, z, verbose=None):
    """GCCMI between 2 continuous variables conditioned on a discrete variable.

    I = gccmi_ccd(x,y,z,Zm) returns the CMI between two (possibly
    multidimensional) continuous variables, x and y, conditioned on a third
    discrete variable z, estimated via a Gaussian copula.

    Parameters
    ----------
    x, y : array_like
        Continuous arrays of shape (n_epochs,) or (n_dimensions, n_epochs).
    z : array_like
        Discret array of shape (n_epochs,)

    Returns
    -------
    cmi : float
        Conditional Mutual Information shared by x and y conditioned by z
        (in bits)
    """
    set_log_level(verbose)
    x = np.atleast_2d(x)
    y = np.atleast_2d(y)
    if x.ndim > 2 or y.ndim > 2:
        raise ValueError("x and y must be at most 2d")
    if z.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not np.issubdtype(z.dtype, np.integer):
        raise ValueError("z should be an integer array")

    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    zm = np.unique(z)

    if y.shape[1] != ntrl or z.size != ntrl:
        raise ValueError("number of trials do not match")

    # check for repeated values
    for xi in range(nvarx):
        if (np.unique(x[xi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input x has more than 10% repeated values")
            break
    for yi in range(nvary):
        if (np.unique(y[yi, :]).size / float(ntrl)) < 0.9:
            logger.info("Input y has more than 10% repeated values")
            break

    # calculate gcmi for each z value
    icond = np.zeros(len(zm))
    pz = np.zeros(len(zm))
    cx = []
    cy = []
    for zi in zm:
        idx = z == zi
        thsx = copnorm_nd(x[:, idx], axis=1)
        thsy = copnorm_nd(y[:, idx], axis=1)
        pz[zi] = idx.sum()
        cx.append(thsx)
        cy.append(thsy)
        icond[zi] = mi_1d_gg(thsx, thsy, True, True)

    pz = pz / float(ntrl)

    # conditional mutual information
    cmi = np.sum(pz * icond)
    i = mi_1d_gg(np.hstack(cx), np.hstack(cy), True, False)
    return (cmi, i)