Esempio n. 1
0
 def test_gray_mask(self):
     temp_imgdata = self.imgdata.copy()
     gray_matter_mask = np.zeros(self.imgdata.shape[0:3])
     gray_matter_mask[0, 1, 0] = 0.5
     gray_matter_mask[1, 2, 1] = 0.9
     corrs_and_mask_calculations.gray_mask(temp_imgdata, gray_matter_mask)
     unmasked_voxels = set(
         corrs_and_mask_calculations.find_unmasked_voxels(temp_imgdata))
     true_unmasked_voxels = set([(0, 1, 0), (1, 2, 1)])
     self.assertEqual(unmasked_voxels, true_unmasked_voxels)
     self.assertTrue(all(temp_imgdata[0, 1, 0] == self.timeseries2))
     self.assertTrue(all(temp_imgdata[1, 2, 1] == self.timeseries5))
     for coord in set(list(itertools.product([0, 1], [0, 1, 2],
                                             [0, 1]))) - set([(0, 1, 0),
                                                              (1, 2, 1)]):
         self.assertTrue(
             all(temp_imgdata[coord] == np.zeros(self.imgdata.shape[3])))
def shuffle_nii(nii_path, mask_path, n_shuffles, out_path_stem):
    """
    Reads a NIFTI file, shuffles the voxel time series while retaining voxel locations, and saves the shuffled time series in 
    a new NIFTI file. The shuffling does not affect the affine of the NIFTI.
    
    Parameters:
    -----------
    nii_path: str, path to the input NIFTI file
    mask_path: str, path to a mask defining the voxel time series to shuffle (can be e.g. a gray matter or ROI mask)
    n_shuffles: int, number of NIFTI files with shuffled time series to be produced
    out_path_stem: str, path to which save the shuffled time series
    
    Returns:
    --------
    no direct output, saves the shuffled time series as NIFTI
    """
    # reading data:
    img = nib.load(nii_path)
    affine = img.affine
    imgdata = img.get_fdata()
    n_x, n_y, n_z, n_t = imgdata.shape
    # reading mask, masking data and finding voxel coordinates
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata()
    corrs_and_mask_calculations.gray_mask(imgdata, mask_data)
    voxel_coordinates = list(zip(*np.where(np.any(imgdata != 0, 3) == True)))
    # reading voxel time series
    n_voxels = len(voxel_coordinates)
    all_voxel_ts = np.zeros((n_voxels, n_t))
    for i, voxel in enumerate(voxel_coordinates):
        all_voxel_ts[i, :] = imgdata[voxel[0], voxel[1], voxel[2], :]
    # shuffling the voxel time series, creating shuffled nii, and saving
    for i in range(n_shuffles):
        shuffled_ts = all_voxel_ts.copy()
        np.random.shuffle(shuffled_ts)
        shuffled_imgdata = np.zeros((n_x, n_y, n_z, n_t))
        for voxel, ts in zip(voxel_coordinates, shuffled_ts):
            shuffled_imgdata[voxel[0], voxel[1], voxel[2], :] = ts
        out_path = out_path_stem + '_shuffle_' + str(i) + '.nii'
        shuffled_img = nib.Nifti1Image(shuffled_imgdata, affine)
        nib.save(shuffled_img, out_path)
Esempio n. 3
0
def isomorphism_classes_from_file(filename,data_mask_filename,
                                timewindow,overlap,density_params,
                                clustering_method_params,
                                nlayers,nnodes,isomorphism_allowed_aspects=[0],
                                isomorphism_class_savenames=None,
                                isomorphism_class_examples_savenames=None,
                                layersetwise_networks_savefolder=None,
                                log_savename=None):
    """Create isomorphism class dictionary/ies from brain imaging data.
    
    Arguments:
    filename : str, name of nifti file
    data_mask_filename : str, name of file containing the gray matter mask for the data
        (None = no masking)
    timewindow : int, length of timewindow in data points
    overlap : int, length of overlap between consecutive timewindows in data points
    density_params : dict, with keys
        intralayer_density : float, density of intralayer networks
        interlayer_density : float, density of interlayer networks
        OR
        intra_avg_degree : float, average intralayer degree of nodes
        inter_avg_degree : float, average in- and out-degree of nodes
    clustering_method_params : dict, with keys
        method : str, name of the clustering method
        AND
        key-value pairs giving parameters for chosen method (see section Clustering)
    nlayers : int, the number of layers in graphlets of interest
    nnodes : int or list of ints, the number of nodes in graphlets of interest
        if list, each entry will be combined with nlayers and enumerated
    isomorphism_allowed_aspects : list, define allowed aspects for isomorphism
    isomorphism_class_savenames : str or list of strs, the same length as nnodes
        filenames for saving found isomorphism class dicts (None = no saving)
    isomorphism_class_examples_savenames : str or list of strs, the same length as nnodes
        filenames for saving example networks for each isomorphism class (None = no saving)
    layersetwise_networks_savefolder : str, folder for saving the generated networks
        (None = no saving)
    log_savename : str, appends successful completion info to this file (None = no logging)
    
    Clustering:
    TODO explanation
    
    Returns:
    dict of dicts of dicts, first level of keys is (nnodes,nlayers) pairs as tuples,
    second level of keys is isomorphism classes as tuples (complete invariants),
    and third level of keys is (ordered) layersets as tuples. The number of (nnodes,nlayers)
    pairs depends on how many different nnodes are given.
    return_dict[(nnodes,nlayers)][compinvariant][(layerset)] = frequency
    (dicts are collections.defaultdict objects)
    """
    # convert int nnodes to length-1 list
    nnodes = [nnodes] if isinstance(nnodes,int) else nnodes
    isomorphism_class_savenames = [isomorphism_class_savenames] if isinstance(isomorphism_class_savenames,str) else isomorphism_class_savenames
    isomorphism_class_examples_savenames = [isomorphism_class_examples_savenames] if isinstance(isomorphism_class_examples_savenames,str) else isomorphism_class_examples_savenames
    # create container data structures for isomorphism classes
    aggregated_isomclass_dict = collections.defaultdict(lambda: collections.defaultdict(dict))
    aggregated_example_dict = collections.defaultdict(dict)
    # load data
    data = nib.load(filename)
    image_array = data.get_fdata()
    # mask data
    if data_mask_filename:
        maskdata = nib.load(data_mask_filename)
        mask_array = maskdata.get_fdata()
        corrs_and_mask_calculations.gray_mask(image_array,mask_array)
    # get layersetwise network generator
    layersetwise_generator = clustering_method_parser(image_array,timewindow,overlap,nlayers,clustering_method_params)
    for M in layersetwise_generator:
        if layersetwise_networks_savefolder:
            # write full network with all the weights
            network_io.write_layersetwise_network(M,layersetwise_networks_savefolder)
        M = network_construction.threshold_network(M,density_params)
        for i in range(len(nnodes)):
            subgraph_classification.find_isomorphism_classes(M,nnodes[i],nlayers,None,
                                                                     allowed_aspects=isomorphism_allowed_aspects,
                                                                     aggregated_dict=aggregated_isomclass_dict[(nnodes[i],nlayers)],
                                                                     examples_dict=aggregated_example_dict[(nnodes[i],nlayers)])
    for i in range(len(nnodes)):
        if isomorphism_class_savenames:
            network_io.write_pickle_file(dict(aggregated_isomclass_dict[(nnodes[i],nlayers)]),isomorphism_class_savenames[i])
        if isomorphism_class_examples_savenames:
            network_io.write_pickle_file(aggregated_example_dict[(nnodes[i],nlayers)],isomorphism_class_examples_savenames[i])
    if log_savename:
        with open(log_savename,'a+') as f:
            f.write(str(nlayers)+' layers '+','.join([str(n) for n in nnodes])+' nodes done\n')
    return aggregated_isomclass_dict
Esempio n. 4
0
def isomorphism_classes_from_nifti(nii_data_filename,
                                   subj_id,
                                   run_number,
                                   timewindow,
                                   overlap,
                                   intralayer_density,
                                   interlayer_density,
                                   subgraph_size_dict,
                                   allowed_aspects=[0],
                                   use_aggregated_dict=True,
                                   create_examples_dict=True,
                                   clustering_method=None,
                                   mask_or_template_filename=None,
                                   mask_or_template_name=None,
                                   number_of_clusters=100,
                                   data_folder=None,
                                   preprocess_level_folder=None,
                                   template_folder=None,
                                   relative_nii_path=False,
                                   relative_template_path=False,
                                   event_time_stamps=None):
    '''
    Usage:
    nii_data_filename : string, filename for nifti file which contains the 4D data matrix (three spatial and one temporal)
    subj_id : string, id for subject for saving (e.g. 'a5n')
    run_number : int, run number for saving (e.g. 2)
    timewindow : int, timewindow size in data points (e.g. 100)
    overlap : int, number of overlapping data points between time windows (e.g. 0 for no overlap)
    intralayer_density : float, edge density (0<density<1) for thresholding correlation networks within layers
    interlayer_density : float, edge density (0<density<1) for thresholding networks between layers, only used if net is
                        _not_ a multiplex network (e.g. template networks and voxel-level networks are multiplex)
    subgraph_size_dict : dict, with number of layers as key and number of nodes as value (in tuple)
                        e.g. {2:(2,3), 3:(2,)} finds all subgraphs with sizes:
                        (2 layers, 2 nodes)
                        (2 layers, 3 nodes)
                        (3 layers, 2 nodes)
    allowed_aspects : list, which aspects are allowed to be permuted when calculating isomorphism classes
                        [0] = vertex-isomorphic classes
                        [0,1] = vertex-layer-isomorphic classes
    use_aggregated_dicts : bool, whether to save results as pickle dict (True) or one-ine-per-subgraph text file (False)
                        USE TRUE!
    create_examples_dict : bool, whether to save an example network from each isomorphism class or not
    clustering_method : string or None, 'template' or 'sklearn' or None
                        'template' = use preconstruted template
                        'sklearn' = use sklearn HAC for each layer individually
                        None = voxel-level analysis
    mask_or_template_filename : string, if clustering_method == 'template', then this will be used as template, otherwise it will
                        be used as a mask (to remove e.g. non-gray matter)
    mask_or_template_name : string, for saving (e.g. 'HarvardOxford')
    number_of_clusters : int, only used if clustering_method == 'sklearn'
    data_folder : string, location of data folder if desired, to be used with relative_nii_path=True
    preprocess_level_folder : string, save location for results (file structure for results will be created under this), IMPORTANT
    template_folder : string, location of template folder if desired, to be used with relative_template_path=True
    relative_nii_path : bool, True if nii_data_filename should be added to data_folder to reach the nifti file (allows nii_data_filename
                        to be given as a relative path starting from data_folder)
    relative_template_path : bool, same as relative_nii_path but for template
    event_time_stamps : list, can contain time stamps where event change happens in the data, to create layers according to them in sklearn
                        clustering (available in sklearn clustering so far, not in template clustering)
    
    Recommendations:
    Do not use data_folder or template_folder and set relative_nii_path=relative_template_path=False.
    There is nothing wrong with using those, but it is simpler to just give nii_data_filename and mask_or_template_filename as complete path,
    e.g. nii_data_filename='/a/b/c/data_file.nii'.
    Preprocess level folder is the location where everything is saved, give a complete path.
    The results will be saved as:
    preprocess_level_folder
        - subj_id
            - run_number
                - clustering_type
                    - mask_or_template_name
                        (- number_of_clusters if using sklearn, else this level does not exist)
                            - net_X
                            - subnets_X
    where X is an identifier containing timewindow, overlap, creation date, and for subnets also densities.
    Net_X will be a folder which contains layersetwise unthresholded networks, subnets_X will be a folder which contains subnets files
    named after nnodes_nlayers. Exact form depends on use_aggregated_dicts. If it is true, these will be pickle files, otherwise text files.
    Subnets_X will also contain example networks in dicts in pickle files, if create_examples_dict==True.
    
    For examples see sections below.
    '''
    assert (0 < intralayer_density < 1 and 0 < interlayer_density < 1)
    assert (isinstance(timewindow, int))
    assert (isinstance(overlap, int))
    # masking required for every file, to remove voxels outside of the brain at the very least
    assert (mask_or_template_filename is not None
            and mask_or_template_name is not None)

    if relative_nii_path:
        nii_data_filename = data_folder + nii_data_filename
    if relative_template_path and mask_or_template_filename is not None:
        mask_or_template_filename = template_folder + mask_or_template_filename

    if clustering_method == None:
        voxel_level_folder = preprocess_level_folder + subj_id + '/' + str(
            run_number) + '/voxel_level/' + mask_or_template_name + '/'
        if not os.path.exists(voxel_level_folder):
            os.makedirs(voxel_level_folder)
    elif clustering_method == 'template':
        assert (mask_or_template_filename is not None
                and mask_or_template_name is not None)
        cluster_level_folder = preprocess_level_folder + subj_id + '/' + str(
            run_number) + '/template_clustering/' + mask_or_template_name + '/'
        if not os.path.exists(cluster_level_folder):
            os.makedirs(cluster_level_folder)
    elif clustering_method == 'sklearn':
        cluster_level_folder = preprocess_level_folder + subj_id + '/' + str(
            run_number) + '/sklearn_hac/' + mask_or_template_name + '/' + str(
                number_of_clusters) + '/'
        if not os.path.exists(cluster_level_folder):
            os.makedirs(cluster_level_folder)
    else:
        raise NotImplementedError('Not implemented')

    current_time = datetime.datetime.now().replace(microsecond=0).isoformat()
    network_identifier = str(timewindow) + '_' + str(
        overlap) + '_' + current_time

    intralayer_density_as_string = str(intralayer_density).replace('.', '')
    if len(intralayer_density_as_string) < 3:
        intralayer_density_as_string = '{:.2f}'.format(
            intralayer_density).replace('.', '')

    if clustering_method == 'sklearn':
        interlayer_density_as_string = str(interlayer_density).replace('.', '')
        if len(interlayer_density_as_string) < 3:
            interlayer_density_as_string = '{:.2f}'.format(
                interlayer_density).replace('.', '')

    if clustering_method == None:
        subnets_folder = voxel_level_folder + 'subnets_' + network_identifier + '_' + intralayer_density_as_string + '/'
    elif clustering_method == 'template':
        subnets_folder = cluster_level_folder + 'subnets_' + network_identifier + '_' + intralayer_density_as_string + '/'
    elif clustering_method == 'sklearn':
        subnets_folder = cluster_level_folder + 'subnets_' + network_identifier + '_' + intralayer_density_as_string + '_' + interlayer_density_as_string + '/'
    else:
        raise NotImplementedError('Not implemented')
    os.makedirs(subnets_folder)

    # load data
    img = nib.load(nii_data_filename)
    imgdata = img.get_fdata()
    # load template if template clustering is used
    if clustering_method == 'template':
        templateimg = nib.load(mask_or_template_filename)
        template = templateimg.get_fdata()
    # apply mask to sklearn clustering, if mask is given
    elif clustering_method == 'sklearn':
        maskimg = nib.load(mask_or_template_filename)
        mask = maskimg.get_fdata()
        corrs_and_mask_calculations.gray_mask(imgdata, mask)
    # apply mask to voxel-level, if mask is given
    elif clustering_method == None:
        maskimg = nib.load(mask_or_template_filename)
        mask = maskimg.get_fdata()
        corrs_and_mask_calculations.gray_mask(imgdata, mask)

    # create aggregated dicts and example dicts
    # saved in dicts with key (nnodes,nlayers) - element is correct dict if relevant parameter is True, None otherwise
    aggregated_dicts_dict = dict()
    examples_dicts_dict = dict()
    for n_layers in subgraph_size_dict:
        for n_nodes in subgraph_size_dict[n_layers]:
            if use_aggregated_dict:
                aggregated_dicts_dict[(
                    n_nodes, n_layers)] = collections.defaultdict(dict)
            else:
                aggregated_dicts_dict[(n_nodes, n_layers)] = None
            if create_examples_dict:
                examples_dicts_dict[(n_nodes, n_layers)] = dict()
            else:
                examples_dicts_dict[(n_nodes, n_layers)] = None

    for n_layers in subgraph_size_dict:
        if clustering_method == None:
            layersetwise_save_location = voxel_level_folder + 'net_' + network_identifier + '/' + str(
                n_layers) + '_layers/'
        elif clustering_method == 'template' or clustering_method == 'sklearn':
            layersetwise_save_location = cluster_level_folder + 'net_' + network_identifier + '/' + str(
                n_layers) + '_layers/'
        else:
            raise NotImplementedError('Not implemented')
        os.makedirs(layersetwise_save_location)

        # Generators for getting layersetwise networks
        if clustering_method == None:
            nanlogfile = voxel_level_folder + 'net_' + network_identifier + '/' + str(
                n_layers) + '_layers_nanlog.txt'
        elif clustering_method == 'template' or clustering_method == 'sklearn':
            nanlogfile = cluster_level_folder + 'net_' + network_identifier + '/' + str(
                n_layers) + '_layers_nanlog.txt'
        else:
            raise NotImplementedError('Not implemented')

        if clustering_method == None:
            layersetwise_generator = network_construction.yield_multiplex_network_in_layersets(
                imgdata, n_layers, timewindow, overlap, nanlogfile=nanlogfile)
        elif clustering_method == 'template':
            layersetwise_generator = network_construction.yield_clustered_multilayer_network_in_layersets(
                imgdata,
                n_layers,
                timewindow,
                overlap,
                n_clusters=-1,
                method='template',
                template=template,
                nanlogfile=nanlogfile)
        elif clustering_method == 'sklearn':
            layersetwise_generator = network_construction.yield_clustered_multilayer_network_in_layersets(
                imgdata,
                n_layers,
                timewindow,
                overlap,
                number_of_clusters,
                method='sklearn',
                template=None,
                nanlogfile=nanlogfile,
                event_time_stamps=event_time_stamps)
        else:
            raise NotImplementedError('Not implemented')

        for M in layersetwise_generator:
            layerset_net_filename = '_'.join(
                [str(l) for l in sorted(M.iter_layers())])
            metadata = 'Origin: ' + nii_data_filename + ' Layers: ' + layerset_net_filename + ' Timewindow: ' + str(
                timewindow) + ' Overlap: ' + str(
                    overlap) + ' Created_on: ' + current_time
            network_io.write_weighted_network(
                M, layersetwise_save_location + layerset_net_filename,
                metadata)

            if clustering_method == None:
                M = network_construction.threshold_multiplex_network(
                    M, intralayer_density)
            elif clustering_method == 'template':
                M = network_construction.threshold_multiplex_network(
                    M, intralayer_density)
            elif clustering_method == 'sklearn':
                M = network_construction.threshold_multilayer_network(
                    M, intralayer_density, interlayer_density)

            for n_nodes in subgraph_size_dict[n_layers]:
                if use_aggregated_dict:
                    subnets_filename = 'this_file_should_not_exist'
                else:
                    subnets_filename = subnets_folder + str(
                        n_nodes) + '_' + str(n_layers)
                subgraph_classification.find_isomorphism_classes(
                    M,
                    n_nodes,
                    n_layers,
                    subnets_filename,
                    allowed_aspects=allowed_aspects,
                    aggregated_dict=aggregated_dicts_dict[(n_nodes, n_layers)],
                    examples_dict=examples_dicts_dict[(n_nodes, n_layers)])
            del (M)

        if use_aggregated_dict:
            for n_nodes in subgraph_size_dict[n_layers]:
                aggregated_dict_filename = subnets_folder + str(
                    n_nodes) + '_' + str(n_layers) + '_agg.pickle'
                f = open(aggregated_dict_filename, 'w')
                pickle.dump(aggregated_dicts_dict[(n_nodes, n_layers)], f)
                f.close()
                del (aggregated_dicts_dict[(n_nodes, n_layers)])

        if create_examples_dict:
            for n_nodes in subgraph_size_dict[n_layers]:
                examples_dict_filename = subnets_folder + 'examples_' + str(
                    n_nodes) + '_' + str(n_layers) + '.pickle'
                f = open(examples_dict_filename, 'w')
                pickle.dump(examples_dicts_dict[(n_nodes, n_layers)], f)
                f.close()
                del (examples_dicts_dict[(n_nodes, n_layers)])

        end_time_for_n_layers = datetime.datetime.now().replace(
            microsecond=0).isoformat()

        if clustering_method == None:
            log_file_name = voxel_level_folder + 'net_' + network_identifier + '/' + 'log.txt'
        elif clustering_method == 'template' or clustering_method == 'sklearn':
            log_file_name = cluster_level_folder + 'net_' + network_identifier + '/' + 'log.txt'
        else:
            raise NotImplementedError('Not implemented')
        with open(log_file_name, 'a+') as f:
            f.write(
                str(n_layers) + '_layers...Done at ' + end_time_for_n_layers +
                '\n')