def temporal_pca(train_data,
                 test_data,
                 train_half=1,
                 test_half=2,
                 stories=None,
                 subjects=None,
                 hemisphere=None,
                 save_prefix=None,
                 **kwargs):

    # Transpose time-series training data
    train_data_t = {}

    # By default grab all stories
    stories = check_keys(train_data, keys=stories)
    for story in stories:
        train_data_t[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(train_data[story],
                                  keys=subjects,
                                  subkey=story)
        for subject in subject_list:
            train_data_t[story][subject] = {}

            # For simplicity we just assume same hemis across stories/subjects
            hemis = check_keys(train_data[story][subject], keys=hemisphere)
            for hemi in hemis:
                train_data_t[story][subject][hemi] = train_data[story][
                    subject][hemi].T

    # Fit SRM on connectivities and get transformation matrices
    transforms = pca_fit(train_data_t,
                         stories=stories,
                         hemisphere=hemisphere,
                         **kwargs)

    # Apply transformations to training data
    train_transformed = pca_transform(train_data,
                                      transforms,
                                      half=train_half,
                                      stories=stories,
                                      subjects=subjects,
                                      hemisphere=hemisphere,
                                      save_prefix=save_prefix + '-train')

    print("Finished applying tPCA transformations to training data")

    # Apply transformations to test data
    test_transformed = pca_transform(test_data,
                                     transforms,
                                     half=test_half,
                                     stories=stories,
                                     subjects=subjects,
                                     hemisphere=hemisphere,
                                     save_prefix=save_prefix + '-test')

    print("Finished applying tPCA transformations to test data")

    return train_transformed, test_transformed
Esempio n. 2
0
def target_wsfc(data,
                targets,
                stories=None,
                subjects=None,
                hemisphere=None,
                zscore_fcs=True):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    target_isfcs = {}
    for story in stories:

        target_isfcs[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        for subject in subject_list:
            target_isfcs[story][subject] = {}

        # Stack targets in third dimension
        target_stack = np.dstack(
            ([targets[story][subject] for subject in subject_list]))

        # By default grab both hemispheres
        hemis = check_keys(data[story][subject_list[0]], keys=hemisphere)

        # Get for specified hemisphere(s)
        for hemi in hemis:

            # Grab ROI data and targets
            data_stack = np.dstack(
                ([data[story][subject][hemi] for subject in subject_list]))

            # Compute WSFCs between ROI and targets
            wsfcs = []
            for s in np.arange(data_stack.shape[2]):
                d = data_stack[..., s].shape[1]
                wsfc = np.corrcoef(data_stack[..., s].T,
                                   target_stack[..., s].T)[:d, d:]
                wsfcs.append(np.expand_dims(wsfc, 0))
            wsfcs = np.vstack(wsfcs)

            # Optionally z-score across targets
            if zscore_fcs:
                wsfcs = zscore(np.nan_to_num(wsfcs), axis=2)

            for s, subject in enumerate(subject_list):
                target_isfcs[story][subject][hemi] = wsfcs[s]

        print(f"Finished computing target WSFCs for story '{story}'")

    return target_wsfcs
def srm_project(train_data, test_data, targets, srms,
                target_fc=target_isfc,
                train_half=1, test_half=2, stories=None,
                subjects=None, hemisphere=None,
                zscore_projected=True, save_prefix=None):

    # Compute FCs for projecting via connectivity
    target_fcs = target_fc(train_data, targets, stories=stories,
                           subjects=subjects, hemisphere=hemisphere)
    
    # By default grab all stories
    stories = check_keys(test_data, keys=stories)

    data_projected = {}
    for story in stories:

        data_projected[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(test_data[story], keys=subjects,
                                  subkey=story)

        for subject in subject_list:

            data_projected[story][subject] = {}
            hemis = check_keys(test_data[story][subject],
                               keys=hemisphere)

            for hemi in hemis:

                # Find new projection into shared space based on FCs
                projection = srms[hemi].transform_subject(
                    target_fcs[story][subject][hemi])
                
                # Project left-out test data into shared space
                projected = test_data[story][subject][hemi].dot(projection)
                
                # Optionally z-score projected output data
                if zscore_projected:
                    projected = zscore(projected, axis=0)

                data_projected[story][subject][hemi] = projected

                if save_prefix:
                    save_fn = (f'data/{subject}_task-{story}_'
                               f'half-{test_half}_{save_prefix}-test_{hemi}.npy')
                    np.save(save_fn, projected)

    return data_projected
Esempio n. 4
0
def target_isfc(data,
                targets,
                stories=None,
                subjects=None,
                hemisphere=None,
                zscore_fcs=True):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    target_isfcs = {}
    for story in stories:

        target_isfcs[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        for subject in subject_list:
            target_isfcs[story][subject] = {}

        # Stack targets in third dimension
        target_stack = np.dstack(
            ([targets[story][subject] for subject in subject_list]))

        # By default grab both hemispheres
        hemis = check_keys(data[story][subject_list[0]], keys=hemisphere)

        # Get for specified hemisphere(s)
        for hemi in hemis:

            # Grab ROI data and targets
            data_stack = np.dstack(
                ([data[story][subject][hemi] for subject in subject_list]))

            # Compute ISFCs between ROI and targets
            isfcs = isfc(data_stack, targets=target_stack)

            # Optionally z-score across targets
            if zscore_fcs:
                isfcs = zscore(np.nan_to_num(isfcs), axis=2)

            for s, subject in enumerate(subject_list):
                target_isfcs[story][subject][hemi] = isfcs[s]

        print(f"Finished computing target ISFCs for story '{story}'")

    return target_isfcs
Esempio n. 5
0
def parcel_means(data, atlas, parcel_labels=None, stories=None, subjects=None):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    parcels = {}
    for story in stories:

        parcels[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        # Stack left and right hemispheres for each subject
        for subject in subject_list:

            # Loop through both hemispheres
            hemi_stack = []
            for hemi in ['lh', 'rh']:

                # Grab mean parcel time series for this hemisphere
                parcel_tss = []
                for parcel_label in parcel_labels[hemi]:

                    # Get mean for this parcel
                    parcel_ts = np.mean(data[story][subject][hemi]
                                        [:, atlas[hemi] == parcel_label],
                                        axis=1)

                    # Expand dimension for easier stacking
                    parcel_tss.append(np.expand_dims(parcel_ts, 1))

                # Stack parcel means
                parcel_tss = np.hstack(parcel_tss)
                hemi_stack.append(parcel_tss)

            # Stack hemispheres
            hemi_stack = np.hstack(hemi_stack)
            assert hemi_stack.shape[1] == (len(parcel_labels['lh']) +
                                           len(parcel_labels['rh']))

            parcels[story][subject] = hemi_stack

        print(f"Finished computing parcel means for '{story}'")

    return parcels
def pca_transform(data,
                  transforms,
                  half=1,
                  stories=None,
                  subjects=None,
                  hemisphere=None,
                  zscore_transformed=True,
                  save_prefix=None):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    data_transformed = {}
    for story in stories:

        data_transformed[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        for subject in subject_list:

            data_transformed[story][subject] = {}
            hemis = check_keys(data[story][subject], keys=hemisphere)

            for hemi in hemis:

                transformed = transforms[hemi].transform(
                    data[story][subject][hemi])

                # Optionally z-score transformed output data
                if zscore_transformed:
                    transformed = zscore(transformed, axis=0)

                data_transformed[story][subject][hemi] = transformed

                if save_prefix:
                    save_fn = (f'data/{subject}_task-{story}_'
                               f'half-{half}_{save_prefix}_{hemi}.npy')
                    np.save(save_fn, transformed)

    return data_transformed
def stack_subjects(data, subjects=None, hemisphere='lh'):

    # By default just grab all subjects
    subject_list = check_keys(data, keys=subjects)

    subject_stack = np.dstack(
        [data[subject][hemisphere] for subject in subject_list])

    assert subject_stack.shape[2] == len(subject_list)

    return subject_stack
Esempio n. 8
0
def parcel_srm(data,
               atlas,
               k=3,
               parcel_labels=None,
               stories=None,
               subjects=None):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    # Firsts compute mean time-series for all target parcels
    targets = parcel_means(data,
                           atlas,
                           parcel_labels=parcel_labels,
                           stories=stories,
                           subjects=subjects)

    # Compute ISFCs with targets for all vertices
    target_fcs = target_isfc(data, targets, stories=stories, subjects=subjects)

    parcels = {}
    for story in stories:

        parcels[story] = {}

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        # Loop through both hemispheres
        hemi_stack = []
        for hemi in ['lh', 'rh']:

            # Loop through parcels
            parcel_tss = []
            for parcel_label in parcel_labels[hemi]:

                # Resort parcel FCs into list of subject parcels
                fc_stack = []
                ts_stack = []
                for subject in subject_list:

                    # Grab the connectivities for this parcel
                    parcel_fcs = target_fcs[story][subject][hemi][
                        atlas[hemi] == parcel_label, :]
                    fc_stack.append(parcel_fcs)

                    ts_stack.append(data[story][subject][hemi]
                                    [:, atlas[hemi] == parcel_label])

                # Set up fresh SRM
                srm = SRM(features=k)

                # Train SRM on parcel connectivities
                srm.fit(np.nan_to_num(fc_stack))

                # Apply transformations to time series
                transformed_stack = [
                    ts.dot(w) for ts, w in zip(ts_stack, srm.w_)
                ]
                transformed_stack = np.dstack(transformed_stack)
                parcel_tss.append(transformed_stack)
                print(f"Finished SRM for {hemi} parcel "
                      f"{parcel_label} in '{story}'")

            # Stack parcel means
            parcel_tss = np.hstack(parcel_tss)
            hemi_stack.append(parcel_tss)

        # Stack hemispheres
        hemi_stack = np.hstack(hemi_stack)
        assert hemi_stack.shape[1] == (len(parcel_labels['lh']) +
                                       len(parcel_labels['rh'])) * k
        assert hemi_stack.shape[2] == len(subject_list)

        # Unstack subjects
        hemi_stack = np.dsplit(hemi_stack, hemi_stack.shape[2])
        for subject, ts in zip(subject_list, hemi_stack):
            parcels[story][subject] = np.squeeze(ts)

        print(f"Finished applying cSRM to parcels for '{story}'")

    return parcels
Esempio n. 9
0
def vertex_isc(data,
               threshold=.2,
               stories=None,
               subjects=None,
               half=1,
               save_iscs=False):

    # By default grab all stories
    stories = check_keys(data, keys=stories)

    vertex_iscs = {}
    for story in stories:

        # By default just grab all subjects
        subject_list = check_keys(data[story], keys=subjects, subkey=story)

        # Get for specified hemisphere(s)
        hemi_stack = []
        for hemi in ['lh', 'rh']:

            # Grab ROI data and targets
            data_stack = np.dstack(
                ([data[story][subject][hemi] for subject in subject_list]))

            # Compute mean ISCs for this story and hemisphere
            iscs = isc(data_stack, summary_statistic='mean')

            # Optionally save ISCs
            if save_iscs:
                save_fn = (f'data/{story}_half-{half}_vertex-iscs_'
                           f'thresh-{threshold}_{hemi}.npy')
                np.save(save_fn, iscs)

            hemi_stack.append(iscs)

        # Stack left and right hemispheres
        vertex_iscs[story] = np.hstack(hemi_stack)

        print(f"Finished computing vertex-wise ISCs for '{story}'")

    # Find the average ISCs across all stories (with Fisher Z)
    mean_iscs = np.tanh(
        np.mean([np.arctanh(vertex_iscs[story]) for story in stories], axis=0))

    # Optionally save ISCs
    if save_iscs:
        save_fn = (f'data/mean_half-{half}_vertex-iscs_'
                   f'thresh-{threshold}_{hemi}.npy')
        np.save(save_fn, iscs)

    # Get vertices with mean ISC exceeding threshold
    isc_mask = mean_iscs >= threshold
    n_mask = np.sum(isc_mask)
    mask_lh = isc_mask[:len(isc_mask) // 2]
    mask_rh = isc_mask[len(isc_mask) // 2:]

    # Grab vertex time-series in ISC mask
    masked_data = {}
    for story in stories:

        masked_data[story] = {}

        for subject in subject_list:

            # Mask and recombine hemispheres
            masked = np.hstack((data[story][subject]['lh'][:, mask_lh],
                                data[story][subject]['rh'][:, mask_rh]))
            assert masked.shape[1] == n_mask

            masked_data[story][subject] = masked

    print("Finished computing ISC-based targets "
          f"({n_mask} vertices at threshold r = {threshold})")

    return masked_data
def pca_fit(target_fcs,
            stories=None,
            subjects=None,
            hemisphere=None,
            k=360,
            n_iter=10,
            half=1,
            save_prefix=None):

    # By default grab all stories
    stories = check_keys(target_fcs, keys=stories)

    # Recompile FCs accounting for repeat subjects
    subject_fcs = {}
    for story in stories:

        # By default just grab all subjects
        subject_list = check_keys(target_fcs[story],
                                  keys=subjects,
                                  subkey=story)

        for subject in subject_list:

            # For simplicity we just assume same hemis across stories/subjects
            hemis = check_keys(target_fcs[story][subject], keys=hemisphere)

            for hemi in hemis:

                # If subject is not already there, make new dict for them
                if subject not in subject_fcs:
                    subject_fcs[subject] = {}

                # If hemispheres aren't in there, add them
                if hemi not in subject_fcs[subject]:
                    subject_fcs[subject][hemi] = []

                # Finally, make list of connectivity matrices per subject
                subject_fcs[subject][hemi].append(
                    target_fcs[story][subject][hemi])

    # Stack FCs in connectivity space (for all subjects across stories!)
    all_subjects = list(subject_fcs.keys())
    for subject in all_subjects:
        for hemi in hemis:

            # If more than one connectivity per subject, take average
            if len(subject_fcs[subject][hemi]) > 1:
                subject_fcs[subject][hemi] = np.mean(
                    subject_fcs[subject][hemi], axis=0)
            else:
                subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0]

    # Convert FCs to list for PCA (grab the shared space too)
    transforms = {}
    for hemi in hemis:

        # Declare PCA for this hemi
        pca = PCA(n_components=k)

        subject_stack = []
        for subject in all_subjects:
            subject_stack.append(subject_fcs[subject][hemi].T)
        subject_stack = np.vstack(subject_stack)

        # Fit PCA
        start = time()
        pca.fit(subject_stack)
        print(f"Finished fitting PCA after {time() - start:.1f} seconds")

        transforms[hemi] = pca

    if save_prefix:
        np.save(f'data/half-{half}_{save_prefix}_pca.npy', transforms)

    return transforms
def srm_fit(target_fcs, stories=None, subjects=None,
            hemisphere=None, k=360, n_iter=10,
            half=1, save_prefix=None):

    # By default grab all stories
    stories = check_keys(target_fcs, keys=stories)

    # Recompile FCs accounting for repeat subjects
    subject_fcs = {}
    for story in stories:

        # By default just grab all subjects
        subject_list = check_keys(target_fcs[story], keys=subjects,
                                  subkey=story)

        for subject in subject_list:

            # For simplicity we just assume same hemis across stories/subjects
            hemis = check_keys(target_fcs[story][subject],
                               keys=hemisphere)

            for hemi in hemis:
                
                # If subject is not already there, make new dict for them
                if subject not in subject_fcs:
                    subject_fcs[subject] = {}
                    
                # If hemispheres aren't in there, add them
                if hemi not in subject_fcs[subject]:
                    subject_fcs[subject][hemi] = []
                    
                # Finally, make list of connectivity matrices per subject
                subject_fcs[subject][hemi].append(
                        target_fcs[story][subject][hemi])

    # Stack FCs in connectivity space (for all subjects across stories!)
    all_subjects = list(subject_fcs.keys())
    for subject in all_subjects:
        for hemi in hemis:

            # If more than one connectivity per subject, take average
            if len(subject_fcs[subject][hemi]) > 1:
                subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi],
                                                     axis=0)
            else:
                subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0]

    # Convert FCs to list for SRM (grab the shared space too)
    transforms, shared_space, srms = {}, {}, {}
    for hemi in hemis:

        # Declare SRM for this hemi
        srm = SRM(n_iter=n_iter, features=k)

        subject_ids, subject_stack = [], []
        for subject in all_subjects:
            subject_ids.append(subject)
            subject_stack.append(subject_fcs[subject][hemi])
            if subject not in transforms:
                transforms[subject] = {}

        # Train SRM and apply
        start = time()
        srm.fit(subject_stack)
        print(f"Finished fitting SRM after {time() - start:.1f} seconds")

        for subject_id, transform in zip(subject_ids, srm.w_):
            transforms[subject_id][hemi] = transform
            
        shared_space[hemi] = srm.s_
        srms[hemi] = srm
        
    if save_prefix:
        np.save(f'data/half-{half}_{save_prefix}_w.npy', transforms)
        np.save(f'data/half-{half}_{save_prefix}_s.npy', shared_space)

    return transforms, srms
Esempio n. 12
0
def srm_fit(target_fcs, stories=None, subjects=None,
            hemisphere=None, k=360, n_iter=10):

    # By default grab all stories
    stories = check_keys(target_fcs, keys=stories)

    # Recompile FCs accounting for repeat subjects
    subject_fcs = {}
    for story in stories:

        # By default just grab all subjects
        subject_list = check_keys(target_fcs[story], keys=subjects,
                                  subkey=story)

        for subject in subject_list:

            # For simplicity we just assume same hemis across stories/subjects
            hemis = check_keys(target_fcs[story][subject],
                               keys=hemisphere)

            for hemi in hemis: 
                if subject not in subject_fcs:
                    subject_fcs[subject] = {}
                if hemi not in subject_fcs[subject]:
                    subject_fcs[subject][hemi] = []
                subject_fcs[subject][hemi].append(
                        target_fcs[story][subject][hemi])

    # Stack FCs across stories in connectivity space
    for subject in subject_list:
        for hemi in hemis:

            if len(subject_fcs[subject][hemi]) > 1:
                subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi],
                                                     axis=0)
            else:
                subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0]

    # Convert FCs to list for SRM
    transforms = {}
    for hemi in hemis:

        # Declare SRM for this hemi
        srm = SRM(n_iter=n_iter, features=k)

        subject_ids, subject_stack = [], []
        for subject in subject_list:
            subject_ids.append(subject)
            subject_stack.append(subject_fcs[subject][hemi])
            if subject not in transforms:
                transforms[subject] = {}

        # Train SRM and apply
        start = time()
        srm.fit(subject_stack)
        print(f"Finished fitting SRM after {time() - start:.1f} seconds")

        for subject_id, transform in zip(subject_ids, srm.w_):
            transforms[subject_id][hemi] = transform

    return transforms
Esempio n. 13
0
    # Loop through keys without replacing existing ones
    for story in stories:
        if story not in results:
            results[story] = {}

        if story_train == 'within':
            train_stories, test_stories = story, story
        elif story_train == 'across':
            test_stories = story
            train_stories = [st for st in stories if st is not test_story]
        elif story_train == 'all':
            test_stories = story
            train_stories = stories

        # By default just grab all subjects
        subject_list = check_keys(metadata[story]['data'])

        # Split models and load in data splits
        train_model_dict = split_models(metadata,
                                        stories=stories,
                                        subjects=None,
                                        half=1,
                                        delays=delays)
        test_model_dict = split_models(metadata,
                                       stories=stories,
                                       subjects=None,
                                       half=2,
                                       delays=delays)

        for roi in rois:
            if roi not in results[story]: