Exemple #1
0
def make_node_dict_from_parcellation(parcellation, dir_path, vox_size='2mm'):
    from pynets.core.nodemaker import get_names_and_coords_of_parcels, \
        parcel_naming
    from pynets.core.utils import save_coords_and_labels_to_json
    coords, _, _, label_intensities = \
        get_names_and_coords_of_parcels(parcellation)
    labels = parcel_naming(coords, vox_size)
    node_file = save_coords_and_labels_to_json(coords, labels,
                                               dir_path, network='regen',
                                               indices=label_intensities)
    return node_file
Exemple #2
0
def test_save_coords_and_labels_to_json(connectivity_data):
    """
    Test save_RSN_coords_and_labels_to_json functionality
    """

    tmp = tempfile.TemporaryDirectory()
    dir_path = str(tmp.name)
    os.makedirs(dir_path, exist_ok=True)
    coords = connectivity_data['coords']
    labels = connectivity_data['labels']
    subnet = 'Default'
    indices = np.arange(len(coords) +
                        1)[np.arange(len(coords) + 1) != 0].tolist()

    nodes_path = utils.save_coords_and_labels_to_json(coords, labels, dir_path,
                                                      subnet, indices)

    assert os.path.isfile(nodes_path) is True
    tmp.cleanup()
Exemple #3
0
def test_save_coords_and_labels_to_json():
    """
    Test save_RSN_coords_and_labels_to_json functionality
    """
    import tempfile

    base_dir = str(Path(__file__).parent/"examples")
    dir_path = str(tempfile.TemporaryDirectory().name)
    os.makedirs(dir_path)
    coord_file_path = f"{base_dir}/miscellaneous/Default_func_coords_wb.pkl"
    coord_file = open(coord_file_path, 'rb')
    coords = pickle.load(coord_file)
    labels_file_path = f"{base_dir}/miscellaneous/Default_func_labelnames_wb.pkl"
    labels_file = open(labels_file_path, 'rb')
    labels = pickle.load(labels_file)
    network = 'Default'
    indices = np.arange(len(coords) + 1)[np.arange(len(coords) + 1) != 0].tolist()

    nodes_path = utils.save_coords_and_labels_to_json(coords, labels,
                                                      dir_path, network,
                                                      indices)

    assert os.path.isfile(nodes_path) is True
Exemple #4
0
def streams2graph(atlas_for_streams, streams, dir_path, track_type, conn_model,
                  subnet, node_radius, dens_thresh, ID, roi, min_span_tree,
                  disp_filt, parc, prune, atlas, parcellation, labels, coords,
                  norm, binary, traversal, warped_fa, min_length,
                  error_margin):
    """
    Use tracked streamlines as a basis for estimating a structural connectome.

    Parameters
    ----------
    atlas_for_streams : str
        File path to atlas parcellation Nifti1Image in T1w-conformed space.
    streams : str
        File path to streamline array sequence in .trk format.
    dir_path : str
        Path to directory containing subject derivative data for a given
        pynets run.
    track_type : str
        Tracking algorithm used (e.g. 'local' or 'particle').
    conn_model : str
        Connectivity reconstruction method (e.g. 'csa', 'tensor', 'csd').
    subnet : str
        Resting-state subnet based on Yeo-7 and Yeo-17 naming (e.g. 'Default')
        used to filter nodes in the study of brain subgraphs.
    node_radius : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's for tracking.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone subnet' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    parcellation : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to graph nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    traversal : str
        The statistical approach to tracking. Options are:
        det (deterministic), closest (clos), boot (bootstrapped),
        and prob (probabilistic).
    warped_fa : str
        File path to MNI-space warped FA Nifti1Image.
    min_length : int
        Minimum fiber length threshold in mm to restrict tracking.
    error_margin : int
        Euclidean margin of error for classifying a streamline as a connection
         to an ROI. Default is 2 voxels.

    Returns
    -------
    atlas_for_streams : str
        File path to atlas parcellation Nifti1Image in T1w-conformed space.
    streams : str
        File path to streamline array sequence in .trk format.
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    track_type : str
        Tracking algorithm used (e.g. 'local' or 'particle').
    dir_path : str
        Path to directory containing subject derivative data for given run.
    conn_model : str
        Connectivity reconstruction method (e.g. 'csa', 'tensor', 'csd').
    subnet : str
        Resting-state subnet based on Yeo-7 and Yeo-17 naming (e.g. 'Default')
        used to filter nodes in the study of brain subgraphs.
    node_radius : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's for tracking.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone subnet' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    parcellation : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to graph nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    traversal : str
        The statistical approach to tracking. Options are: det (deterministic),
        closest (clos), boot (bootstrapped), and prob (probabilistic).
    min_length : int
        Minimum fiber length threshold in mm to restrict tracking.
    error_margin : int
        Euclidean margin of error for classifying a streamline as a connection
         to an ROI. Default is 2 voxels.

    References
    ----------
    .. [1] Sporns, O., Tononi, G., & Kötter, R. (2005). The human connectome:
      A structural description of the human brain. PLoS Computational Biology.
      https://doi.org/10.1371/journal.pcbi.0010042
    .. [2] Sotiropoulos, S. N., & Zalesky, A. (2019). Building connectomes
      using diffusion MRI: why, how and but. NMR in Biomedicine.
      https://doi.org/10.1002/nbm.3752
    .. [3] Chung, M. K., Hanson, J. L., Adluru, N., Alexander, A. L., Davidson,
      R. J., & Pollak, S. D. (2017). Integrative Structural Brain subnet
      Analysis in Diffusion Tensor Imaging. Brain Connectivity.
      https://doi.org/10.1089/brain.2016.0481
    """
    import gc
    import time
    from dipy.tracking.streamline import Streamlines, values_from_volume
    from dipy.tracking._utils import _mapping_to_voxel, _to_voxel_coordinates
    import networkx as nx
    from itertools import combinations
    from collections import defaultdict
    from pynets.core import utils, nodemaker
    from pynets.dmri.utils import generate_sl
    from dipy.io.streamline import load_tractogram
    from dipy.io.stateful_tractogram import Space, Origin
    from pynets.core.utils import load_runconfig

    hardcoded_params = load_runconfig()
    fa_wei = hardcoded_params["StructuralNetworkWeighting"]["fa_weighting"][0]
    fiber_density = hardcoded_params["StructuralNetworkWeighting"][
        "fiber_density"][0]
    overlap_thr = hardcoded_params["StructuralNetworkWeighting"][
        "overlap_thr"][0]
    roi_neighborhood_tol = \
        hardcoded_params['tracking']["roi_neighborhood_tol"][0]

    start = time.time()

    if float(roi_neighborhood_tol) <= float(error_margin):
        raise ValueError('roi_neighborhood_tol preset cannot be less than '
                         'the value of the structural connectome error'
                         '_margin parameter.')
    else:
        print(f"Using fiber-roi intersection tolerance: {error_margin}...")

    # Load FA
    fa_img = nib.load(warped_fa)

    # Load parcellation
    roi_img = nib.load(atlas_for_streams)
    atlas_data = np.around(np.asarray(roi_img.dataobj))
    roi_zooms = roi_img.header.get_zooms()
    roi_shape = roi_img.shape

    # Read Streamlines
    if streams is not None:
        streamlines = [
            i.astype(np.float32) for i in Streamlines(
                load_tractogram(streams,
                                fa_img,
                                to_origin=Origin.NIFTI,
                                to_space=Space.VOXMM).streamlines)
        ]

        # Remove streamlines with negative voxel indices
        lin_T, offset = _mapping_to_voxel(np.eye(4))
        streams_filtered = []
        neg_vox = False
        for sl in streamlines:
            inds = np.dot(sl, lin_T)
            inds += offset
            if not inds.min().round(decimals=6) < 0:
                streams_filtered.append(sl)
            else:
                neg_vox = True

        if neg_vox is True:
            print(UserWarning("Negative voxel indices detected! " "Check FOV"))

        streamlines = streams_filtered
        del streams_filtered
        # from fury import actor, window, colormap
        # renderer = window.Renderer()
        # template_actor = actor.contour_from_roi(roi_img.get_fdata(),
        #                                         color=(50, 50, 50),
        #                                         opacity=1)
        # renderer.add(template_actor)
        # lines_actor = actor.line(streamlines,
        #                                colormap.line_colors(streamlines))
        # renderer.add(lines_actor)
        # window.show(renderer)
        #
        # roi_img.uncache()

        if fa_wei is True:
            fa_weights = values_from_volume(
                np.asarray(fa_img.dataobj, dtype=np.float32), streamlines,
                np.eye(4))
            global_fa_weights = list(utils.flatten(fa_weights))
            min_global_fa_wei = min([i for i in global_fa_weights if i > 0])
            max_global_fa_wei = max(global_fa_weights)
            fa_weights_norm = []
            # Here we normalize by global FA
            for val_list in fa_weights:
                fa_weights_norm.append(
                    np.nanmean((val_list - min_global_fa_wei) /
                               (max_global_fa_wei - min_global_fa_wei)))

        # Make streamlines into generators to keep memory at a minimum
        total_streamlines = len(streamlines)
        sl = [generate_sl(i) for i in streamlines]
        del streamlines
        gc.collect()

        # Instantiate empty networkX graph object & dictionary and create
        # voxel-affine mapping
        lin_T, offset = _mapping_to_voxel(np.eye(4))
        mx = len(np.unique(atlas_data.astype("uint16"))) - 1
        g = nx.Graph(ecount=0, vcount=mx)
        edge_dict = defaultdict(int)
        node_dict = dict(
            zip(np.unique(atlas_data.astype("uint16"))[1:],
                np.arange(mx) + 1))

        # Add empty vertices with label volume attributes
        for node in range(1, mx + 1):
            g.add_node(node,
                       roi_volume=np.sum(atlas_data.astype("uint16") == node))

        # Build graph
        pc = 0
        bad_idxs = []
        fiberlengths = {}
        fa_weights_dict = {}
        print(f"Quantifying fiber-ROI intersection for {atlas}:")
        for ix, s in enumerate(sl):
            # Percent counter
            pcN = int(round(100 * float(ix / total_streamlines)))
            if pcN % 10 == 0 and ix > 0 and pcN > pc:
                pc = pcN
                print(f"{pcN}%")

            # Map the streamlines coordinates to voxel coordinates and get
            # labels for label_volume
            s = Streamlines(s)
            if s.data.shape[0] == 0:
                continue
            vox_coords = _to_voxel_coordinates(s, lin_T, offset)

            [i, j, k] = np.vstack(
                np.array([
                    nodemaker.get_sphere(coord, error_margin, roi_zooms,
                                         roi_shape) for coord in vox_coords
                ])).T

            # get labels for label_volume
            lab_arr = atlas_data[i, j, k]
            # print(lab_arr)
            endlabels = []
            for jx, lab in enumerate(np.unique(lab_arr).astype("uint32")):
                if (lab > 0) and (np.sum(lab_arr == lab) >= overlap_thr):
                    try:
                        endlabels.append(node_dict[lab])
                    except BaseException:
                        bad_idxs.append(jx)
                        print(f"Label {lab} missing from parcellation. Check "
                              f"registration and ensure valid input "
                              f"parcellation file.")

            for edge in combinations(endlabels, 2):
                # Get fiber lengths along edge
                if fiber_density is True:
                    if not (edge[0], edge[1]) in fiberlengths.keys():
                        fiberlengths[(edge[0], edge[1])] = [len(vox_coords)]
                    else:
                        fiberlengths[(edge[0],
                                      edge[1])].append(len(vox_coords))

                # Get FA values along edge
                if fa_wei is True:
                    if not (edge[0], edge[1]) in fa_weights_dict.keys():
                        fa_weights_dict[(edge[0],
                                         edge[1])] = [fa_weights_norm[ix]]
                    else:
                        fa_weights_dict[(edge[0],
                                         edge[1])].append(fa_weights_norm[ix])

                edge_dict[tuple(sorted(tuple([int(node)
                                              for node in edge])))] += 1

            g.add_weighted_edges_from([(k[0], k[1], count)
                                       for k, count in edge_dict.items()])

            del lab_arr, endlabels
            gc.collect()

        del sl
        gc.collect()

        # Add fiber density attributes for each edge
        # Adapted from the nnormalized fiber-density estimation routines of
        # Sebastian Tourbier.
        if fiber_density is True:
            print("Redefining edges on the basis of fiber density...")
            # Summarize total fibers and total label volumes
            total_fibers = 0
            total_volume = 0
            u_start = -1
            for u, v, d in g.edges(data=True):
                total_fibers += len(d)
                if u != u_start:
                    total_volume += g.nodes[int(u)]['roi_volume']
                u_start = u

            ix = 0
            for u, v, d in g.edges(data=True):
                if d['weight'] > 0:
                    fiber_density = (float(
                        ((float(d['weight']) / float(total_fibers)) /
                         float(np.nanmean(fiberlengths[(u, v)]))) *
                        ((2.0 * float(total_volume)) /
                         (g.nodes[int(u)]['roi_volume'] +
                          g.nodes[int(v)]['roi_volume'])))) * 1000
                else:
                    fiber_density = 0
                g.edges[u, v].update({"fiber_density": fiber_density})
                ix += 1

        if fa_wei is True:
            print("Re-weighting edges by mean FA along each edge's associated "
                  "bundles...")
            # Add FA attributes for each edge
            ix = 0
            for u, v, d in g.edges(data=True):
                if d['weight'] > 0:
                    edge_average_fa = np.nanmean(fa_weights_dict[(u, v)])
                else:
                    edge_average_fa = np.nan
                g.edges[u, v].update({"fa_weight": edge_average_fa})
                ix += 1

        # Summarize weights
        if fa_wei is True and fiber_density is True:
            for u, v, d in g.edges(data=True):
                g.edges[u, v].update(
                    {"final_weight": (d['fa_weight']) * d['fiber_density']})
        elif fiber_density is True and fa_wei is False:
            for u, v, d in g.edges(data=True):
                g.edges[u, v].update({"final_weight": d['fiber_density']})
        elif fa_wei is True and fiber_density is False:
            for u, v, d in g.edges(data=True):
                g.edges[u, v].update(
                    {"final_weight": d['fa_weight'] * d['weight']})
        else:
            for u, v, d in g.edges(data=True):
                g.edges[u, v].update({"final_weight": d['weight']})

        # Convert weighted graph to numpy matrix
        conn_matrix_raw = nx.to_numpy_array(g, weight='final_weight')

        # Enforce symmetry
        conn_matrix = np.maximum(conn_matrix_raw, conn_matrix_raw.T)

        print("Structural graph completed:\n", str(time.time() - start))

        if len(bad_idxs) > 0:
            bad_idxs = sorted(list(set(bad_idxs)), reverse=True)
            for j in bad_idxs:
                del labels[j], coords[j]
    else:
        print(
            UserWarning('No valid streamlines detected. '
                        'Proceeding with an empty graph...'))
        mx = len(np.unique(atlas_data.astype("uint16"))) - 1
        conn_matrix = np.zeros((mx, mx))

    assert len(coords) == len(labels) == conn_matrix.shape[0]

    if subnet is not None:
        atlas_name = f"{atlas}_{subnet}_stage-rawgraph"
    else:
        atlas_name = f"{atlas}_stage-rawgraph"

    utils.save_coords_and_labels_to_json(coords,
                                         labels,
                                         dir_path,
                                         atlas_name,
                                         indices=None)

    coords = np.array(coords)
    labels = np.array(labels)

    if parc is True:
        node_radius = "parc"

    # Save unthresholded
    utils.save_mat(
        conn_matrix,
        utils.create_raw_path_diff(ID, subnet, conn_model, roi, dir_path,
                                   node_radius, track_type, parc, traversal,
                                   min_length, error_margin),
    )

    return (atlas_for_streams, streams, conn_matrix, track_type, dir_path,
            conn_model, subnet, node_radius, dens_thresh, ID, roi,
            min_span_tree, disp_filt, parc, prune, atlas, parcellation, labels,
            coords, norm, binary, traversal, min_length, error_margin)
Exemple #5
0
def thresh_func(
    dens_thresh,
    thr,
    conn_matrix,
    conn_model,
    network,
    ID,
    dir_path,
    roi,
    node_size,
    min_span_tree,
    smooth,
    disp_filt,
    parc,
    prune,
    atlas,
    uatlas,
    labels,
    coords,
    norm,
    binary,
    hpass,
    extract_strategy,
    check_consistency=True,
):
    """
    Threshold a functional connectivity matrix using any of a variety of
    methods.

    Parameters
    ----------
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis
        for thresholding.
    thr : float
        A value, between 0 and 1, to threshold the graph using any variety of
        methods triggered through other options.
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    node_size : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting
        signal from ROI's.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : float
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str
        The name of a valid function used to reduce the time-series region
        extraction.

    Returns
    -------
    conn_matrix_thr : array
        Weighted, thresholded, NxN matrix.
    edge_threshold : str
        The string percentage representation of thr.
    est_path : str
        File path to the thresholded graph, conn_matrix_thr, saved as a numpy
        array in .npy format.
    thr : float
        The value, between 0 and 1, used to threshold the graph using any
        variety of methods triggered through other options.
    node_size : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting
        signal from ROI's.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    ID : str
        A subject id or other unique identifier.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : float
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str
        The name of a valid function used to reduce the time-series region
        extraction.

    References
    ----------
    .. [1] van Wijk, B. C. M., Stam, C. J., & Daffertshofer, A. (2010).
      Comparing brain networks of different size and connectivity
      density using graph theory. PLoS ONE.
      https://doi.org/10.1371/journal.pone.0013701

    """
    import gc
    from pynets.core import utils, thresholding

    if np.count_nonzero(conn_matrix) == 0:
        print(UserWarning("Raw connectivity matrix contains only" " zeros."))

    [thr_type, edge_threshold, conn_matrix_thr] = \
        thresholding.perform_thresholding(
        conn_matrix, thr, min_span_tree, dens_thresh, disp_filt)

    if not nx.is_connected(nx.from_numpy_matrix(conn_matrix_thr)):
        print("Warning: Fragmented graph")

    # Save thresholded mat
    est_path = utils.create_est_path_func(
        ID,
        network,
        conn_model,
        thr,
        roi,
        dir_path,
        node_size,
        smooth,
        thr_type,
        hpass,
        parc,
        extract_strategy,
    )

    utils.save_mat(conn_matrix_thr, est_path)
    gc.collect()

    if check_consistency is True:
        assert len(coords) == len(labels) == conn_matrix_thr.shape[0]

    if network is not None:
        atlas_name = f"{atlas}_{network}_stage-post_thr"
    else:
        atlas_name = f"{atlas}_stage-post_thr"

    utils.save_coords_and_labels_to_json(coords,
                                         labels,
                                         dir_path,
                                         atlas_name,
                                         indices=None)

    return (
        edge_threshold,
        est_path,
        thr,
        node_size,
        network,
        conn_model,
        roi,
        smooth,
        prune,
        ID,
        dir_path,
        atlas,
        uatlas,
        labels,
        coords,
        norm,
        binary,
        hpass,
        extract_strategy,
    )
Exemple #6
0
def get_conn_matrix(
    time_series,
    conn_model,
    dir_path,
    node_size,
    smooth,
    dens_thresh,
    network,
    ID,
    roi,
    min_span_tree,
    disp_filt,
    parc,
    prune,
    atlas,
    uatlas,
    labels,
    coords,
    norm,
    binary,
    hpass,
    extract_strategy,
):
    """
    Computes a functional connectivity matrix based on a node-extracted
    time-series array. Includes a library of routines across Nilearn,
    scikit-learn, and skggm packages, among others.

    Parameters
    ----------
    time_series : array
        2D m x n array consisting of the time-series signal for each ROI node
        where m = number of scans and n = number of ROI's.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting
        signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str
        The name of a valid function used to reduce the time-series region
        extraction.

    Returns
    -------
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's for tracking.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting
        signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to graph nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str
        The name of a valid function used to reduce the time-series region
        extraction.

    References
    ----------
    .. [1] Varoquaux, G., & Craddock, R. C. (2013). Learning and comparing
      functional connectomes across subjects. NeuroImage.
      https://doi.org/10.1016/j.neuroimage.2013.04.007
    .. [2] Jason Laska, Manjari Narayan, 2017. skggm 0.2.7:
      A scikit-learn compatible package for Gaussian and related Graphical
      Models. doi:10.5281/zenodo.830033

    """
    import sys
    from pynets.core import utils
    from pynets.fmri.estimation import get_optimal_cov_estimator
    from nilearn.connectome import ConnectivityMeasure

    nilearn_kinds = [
        "cov", "covariance", "covar", "corr", "cor", "correlation", "partcorr",
        "parcorr", "partialcorrelation", "cov", "covariance", "covar", "sps",
        "sparse", "precision"
    ]

    conn_matrix = None
    estimator = get_optimal_cov_estimator(time_series)

    def _fallback_covariance(time_series):
        from sklearn.ensemble import IsolationForest
        from sklearn import covariance

        # Remove gross outliers
        model = IsolationForest(contamination=0.02)
        model.fit(time_series)
        outlier_mask = model.predict(time_series)
        outlier_mask[outlier_mask == -1] = 0
        time_series = time_series[outlier_mask.astype('bool')]

        # Fall back to LedoitWolf
        print('Matrix estimation failed with Lasso and shrinkage due to '
              'ill conditions. Removing potential anomalies from the '
              'time-series using IsolationForest...')
        try:
            print("Attempting with Ledoit-Wolf...")
            conn_measure = ConnectivityMeasure(
                cov_estimator=covariance.LedoitWolf(store_precision=True,
                                                    assume_centered=True),
                kind=kind)
            conn_matrix = conn_measure.fit_transform([time_series])[0]
        except (np.linalg.linalg.LinAlgError, FloatingPointError):
            print("Attempting Oracle Approximating Shrinkage Estimator...")
            conn_measure = ConnectivityMeasure(
                cov_estimator=covariance.OAS(assume_centered=True), kind=kind)
            try:
                conn_matrix = conn_measure.fit_transform([time_series])[0]
            except (np.linalg.linalg.LinAlgError, FloatingPointError):
                raise ValueError('All covariance estimators failed to '
                                 'converge...')

        return conn_matrix

    if conn_model in nilearn_kinds:
        if conn_model == "corr" or conn_model == "cor" or \
                conn_model == "correlation":
            print("\nComputing correlation matrix...\n")
            kind = "correlation"
        elif conn_model == "partcorr" or conn_model == "parcorr" or \
                conn_model == "partialcorrelation":
            print("\nComputing partial correlation matrix...\n")
            kind = "partial correlation"
        elif conn_model == "sps" or conn_model == "sparse" or \
                conn_model == "precision":
            print("\nComputing precision matrix...\n")
            kind = "precision"
        elif conn_model == "cov" or conn_model == "covariance" or \
                conn_model == "covar":
            print("\nComputing covariance matrix...\n")
            kind = "covariance"
        else:
            raise ValueError(
                "\nERROR! No connectivity model specified at runtime. Select a"
                " valid estimator using the -mod flag.")

        # Try with the best-fitting Lasso estimator
        if estimator:
            conn_measure = ConnectivityMeasure(cov_estimator=estimator,
                                               kind=kind)
            try:
                conn_matrix = conn_measure.fit_transform([time_series])[0]
            except (np.linalg.linalg.LinAlgError, FloatingPointError):
                conn_matrix = _fallback_covariance(time_series)
        else:
            conn_matrix = _fallback_covariance(time_series)
    else:
        if conn_model == "QuicGraphicalLasso":
            try:
                from inverse_covariance import QuicGraphicalLasso
            except ImportError as e:
                print(e, "Cannot run QuicGraphLasso. Skggm not installed!")

            # Compute the sparse inverse covariance via QuicGraphLasso
            # credit: skggm
            model = QuicGraphicalLasso(init_method="cov",
                                       lam=0.5,
                                       mode="default",
                                       verbose=1)
            print("\nCalculating QuicGraphLasso precision matrix using "
                  "skggm...\n")
            model.fit(time_series)
            conn_matrix = model.precision_
        elif conn_model == "QuicGraphicalLassoCV":
            try:
                from inverse_covariance import QuicGraphicalLassoCV
            except ImportError as e:
                print(e, "Cannot run QuicGraphLassoCV. Skggm not installed!")

            # Compute the sparse inverse covariance via QuicGraphLassoCV
            # credit: skggm
            model = QuicGraphicalLassoCV(init_method="cov", verbose=1)
            print("\nCalculating QuicGraphLassoCV precision matrix using"
                  " skggm...\n")
            model.fit(time_series)
            conn_matrix = model.precision_
        elif conn_model == "QuicGraphicalLassoEBIC":
            try:
                from inverse_covariance import QuicGraphicalLassoEBIC
            except ImportError as e:
                print(e, "Cannot run QuicGraphLassoEBIC. Skggm not installed!")

            # Compute the sparse inverse covariance via QuicGraphLassoEBIC
            # credit: skggm
            model = QuicGraphicalLassoEBIC(init_method="cov", verbose=1)
            print("\nCalculating QuicGraphLassoEBIC precision matrix using"
                  " skggm...\n")
            model.fit(time_series)
            conn_matrix = model.precision_
        elif conn_model == "AdaptiveQuicGraphicalLasso":
            try:
                from inverse_covariance import (
                    AdaptiveQuicGraphicalLasso,
                    QuicGraphicalLassoEBIC,
                )
            except ImportError as e:
                print(e, "Cannot run AdaptiveGraphLasso. Skggm not installed!")

            # Compute the sparse inverse covariance via
            # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
            # credit: skggm
            model = AdaptiveQuicGraphicalLasso(
                estimator=QuicGraphicalLassoEBIC(init_method="cov", ),
                method="binary",
            )
            print("\nCalculating AdaptiveQuicGraphLasso precision matrix using"
                  " skggm...\n")
            model.fit(time_series)
            conn_matrix = model.estimator_.precision_
        else:
            raise ValueError("\nNo connectivity model specified at runtime. "
                             "Select a valid estimator using the -mod flag.")

    # Enforce symmetry
    conn_matrix = np.nan_to_num(np.maximum(conn_matrix, conn_matrix.T))

    if parc is True:
        node_size = "parc"

    # Save unthresholded
    utils.save_mat(
        conn_matrix,
        utils.create_raw_path_func(
            ID,
            network,
            conn_model,
            roi,
            dir_path,
            node_size,
            smooth,
            hpass,
            parc,
            extract_strategy,
        ),
    )

    if conn_matrix.shape < (2, 2):
        raise RuntimeError(
            "\nMatrix estimation selection yielded an empty or"
            " 1-dimensional graph. "
            "Check time-series for errors or try using a different atlas")

    if network is not None:
        atlas_name = f"{atlas}_{network}_stage-rawgraph"
    else:
        atlas_name = f"{atlas}_stage-rawgraph"

    utils.save_coords_and_labels_to_json(coords,
                                         labels,
                                         dir_path,
                                         atlas_name,
                                         indices=None)

    coords = np.array(coords)
    labels = np.array(labels)

    # assert coords.shape[0] == labels.shape[0] == conn_matrix.shape[0]

    del time_series

    return (
        conn_matrix,
        conn_model,
        dir_path,
        node_size,
        smooth,
        dens_thresh,
        network,
        ID,
        roi,
        min_span_tree,
        disp_filt,
        parc,
        prune,
        atlas,
        uatlas,
        labels,
        coords,
        norm,
        binary,
        hpass,
        extract_strategy,
    )
Exemple #7
0
def thresh_struct(
    dens_thresh,
    thr,
    conn_matrix,
    conn_model,
    subnet,
    ID,
    dir_path,
    roi,
    node_radius,
    min_span_tree,
    disp_filt,
    parc,
    prune,
    atlas,
    parcellation,
    labels,
    coords,
    norm,
    binary,
    track_type,
    atlas_for_streams,
    streams,
    traversal,
    min_length,
    error_margin,
    check_consistency=True,
):
    """
    Threshold a structural connectivity matrix using any of a variety of
    methods.

    Parameters
    ----------
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    thr : float
        A value, between 0 and 1, to threshold the graph using any variety of
        methods triggered through other options.
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    subnet : str
        Resting-state subnet based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    node_radius : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone subnet' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    parcellation : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    track_type : str
        Tracking algorithm used (e.g. 'local' or 'particle').
    atlas_for_streams : str
        File path to atlas parcellation Nifti1Image in the morphological
        space of the streamlines.
    streams : str
        File path to save streamline array sequence in .trk format.
    traversal : str
        The statistical approach to tracking. Options are:
        det (deterministic), closest (clos), boot (bootstrapped),
        and prob (probabilistic).
    min_length : int
        Minimum fiber length threshold in mm to restrict tracking.

    Returns
    -------
    conn_matrix_thr : array
        Weighted, thresholded, NxN matrix.
    edge_threshold : str
        The string percentage representation of thr.
    est_path : str
        File path to the thresholded graph, conn_matrix_thr, saved as a numpy
        array in .npy format.
    thr : float
        The value, between 0 and 1, used to threshold the graph using any
        variety of methods triggered through other options.
    node_radius : int
        Spherical centroid node size in the case that coordinate-based
        centroids are used as ROI's.
    subnet : str
        Resting-state subnet based on Yeo-7 and Yeo-17 naming
        (e.g. 'Default') used to filter nodes in the study of brain subgraphs.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for
       covariance, sps for precision covariance, partcorr for partial
       correlation). sps type is used by default.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    ID : str
        A subject id or other unique identifier.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    atlas : str
        Name of atlas parcellation used.
    parcellation : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    track_type : str
        Tracking algorithm used (e.g. 'local' or 'particle').
    atlas_for_streams : str
        File path to atlas parcellation Nifti1Image in the morphological
        space of the streamlines.
    streams : str
        File path to save streamline array sequence in .trk format.
    traversal : str
        The statistical approach to tracking. Options are:
        det (deterministic), closest (clos), boot (bootstrapped),
        and prob (probabilistic).
    min_length : int
        Minimum fiber length threshold in mm to restrict tracking.

    References
    ----------
    .. [1] van Wijk, B. C. M., Stam, C. J., & Daffertshofer, A. (2010).
      Comparing brain networks of different size and connectivity
      density using graph theory. PLoS ONE.
      https://doi.org/10.1371/journal.pone.0013701

    """
    import gc
    from pynets.core import utils, thresholding

    if parc is True:
        node_radius = "parc"

    if np.count_nonzero(conn_matrix) == 0:
        print(UserWarning("Raw connectivity matrix contains only"
                          " zeros."))

    [thr_type, edge_threshold, conn_matrix_thr] = \
        thresholding.perform_thresholding(
        conn_matrix, thr, min_span_tree, dens_thresh, disp_filt)

    if not nx.is_connected(nx.from_numpy_matrix(conn_matrix_thr)):
        print("Warning: Fragmented graph")

    # Save thresholded mat
    est_path = utils.create_est_path_diff(
        ID,
        subnet,
        conn_model,
        thr,
        roi,
        dir_path,
        node_radius,
        track_type,
        thr_type,
        parc,
        traversal,
        min_length,
        error_margin
    )

    utils.save_mat(conn_matrix_thr, est_path)
    gc.collect()

    if check_consistency is True:
        assert len(coords) == len(labels) == conn_matrix_thr.shape[0]

    if subnet is not None:
        atlas_name = f"{atlas}_{subnet}_stage-post_thr"
    else:
        atlas_name = f"{atlas}_stage-post_thr"

    utils.save_coords_and_labels_to_json(coords, labels, dir_path,
                                         atlas_name, indices=None)

    return (
        edge_threshold,
        est_path,
        thr,
        node_radius,
        subnet,
        conn_model,
        roi,
        prune,
        ID,
        dir_path,
        atlas,
        parcellation,
        labels,
        coords,
        norm,
        binary,
        track_type,
        atlas_for_streams,
        streams,
        traversal,
        min_length,
        error_margin
    )