def test_simulated_correlations(): # Get standard brain mask mr_directory = get_data_directory() standard = "%s/MNI152_T1_2mm_brain_mask.nii.gz" %(mr_directory) thresholds = [0.0,0.5,1.0,1.5,1.96,2.0] # Generate random data inside brain mask, run 10 iterations standard = nibabel.load(standard) number_values = len(numpy.where(standard.get_data()!=0)[0]) numpy.random.seed(9191986) for x in range(0,10): data1 = norm.rvs(size=number_values) data2 = norm.rvs(size=number_values) corr = pearsonr(data1,data2)[0] # Put into faux nifti images mr1 = numpy.zeros(standard.shape) mr1[standard.get_data()!=0] = data1 mr1 = nibabel.nifti1.Nifti1Image(mr1,affine=standard.get_affine(),header=standard.get_header()) mr2 = numpy.zeros(standard.shape) mr2[standard.get_data()!=0] = data2 mr2 = nibabel.nifti1.Nifti1Image(mr2,affine=standard.get_affine(),header=standard.get_header()) pdmask = make_binary_deletion_mask([mr1,mr2]) pdmask = nibabel.Nifti1Image(pdmask,header=mr1.get_header(),affine=mr1.get_affine()) score = calculate_correlation(images = [mr1,mr2],mask=pdmask) assert_almost_equal(corr,score,decimal=5)
def test_simulated_correlations(): # Get standard brain mask mr_directory = get_data_directory() standard = "%s/MNI152_T1_2mm_brain_mask.nii.gz" % (mr_directory) thresholds = [0.0, 0.5, 1.0, 1.5, 1.96, 2.0] # Generate random data inside brain mask, run 10 iterations standard = nibabel.load(standard) number_values = len(numpy.where(standard.get_data() != 0)[0]) numpy.random.seed(9191986) for x in range(0, 10): data1 = norm.rvs(size=number_values) data2 = norm.rvs(size=number_values) corr = pearsonr(data1, data2)[0] # Put into faux nifti images mr1 = numpy.zeros(standard.shape) mr1[standard.get_data() != 0] = data1 mr1 = nibabel.nifti1.Nifti1Image(mr1, affine=standard.get_affine(), header=standard.get_header()) mr2 = numpy.zeros(standard.shape) mr2[standard.get_data() != 0] = data2 mr2 = nibabel.nifti1.Nifti1Image(mr2, affine=standard.get_affine(), header=standard.get_header()) pdmask = make_binary_deletion_mask([mr1, mr2]) pdmask = nibabel.Nifti1Image(pdmask, header=mr1.get_header(), affine=mr1.get_affine()) score = calculate_correlation(images=[mr1, mr2], mask=pdmask) assert_almost_equal(corr, score, decimal=5)
def save_voxelwise_pearson_similarity_resample(pk1, pk2, resample_dim=[4, 4, 4]): from neurovault.apps.statmaps.models import Similarity, Comparison # We will always calculate Comparison 1 vs 2, never 2 vs 1 if pk1 != pk2: sorted_images = get_images_by_ordered_id(pk1, pk2) image1 = sorted_images[0] image2 = sorted_images[1] pearson_metric = Similarity.objects.get( similarity_metric="pearson product-moment correlation coefficient", transformation="voxelwise") # Get standard space brain mr_directory = get_data_directory() reference = "%s/MNI152_T1_2mm_brain_mask.nii.gz" % (mr_directory) image_paths = [image.file.path for image in [image1, image2]] images_resamp, _ = resample_images_ref(images=image_paths, reference=reference, interpolation="continuous", resample_dim=resample_dim) # resample_images_ref will "squeeze" images, but we should keep error here for now for image_nii, image_obj in zip(images_resamp, [image1, image2]): if len(numpy.squeeze(image_nii.get_data()).shape) != 3: raise Exception( "Image %s (id=%d) has incorrect number of dimensions %s" % (image_obj.name, image_obj.id, str(image_nii.get_data().shape))) # Calculate correlation only on voxels that are in both maps (not zero, and not nan) image1_res = images_resamp[0] image2_res = images_resamp[1] binary_mask = make_binary_deletion_mask(images_resamp) binary_mask = nib.Nifti1Image(binary_mask, header=image1_res.get_header(), affine=image1_res.get_affine()) # Will return nan if comparison is not possible pearson_score = calculate_correlation([image1_res, image2_res], mask=binary_mask, corr_type="pearson") # Only save comparison if is not nan if not numpy.isnan(pearson_score): Comparison.objects.update_or_create( image1=image1, image2=image2, similarity_metric=pearson_metric, similarity_score=pearson_score) return image1.pk, image2.pk, pearson_score else: raise Exception("You are trying to compare an image with itself!")
def save_voxelwise_pearson_similarity_resample(pk1, pk2,resample_dim=[4,4,4]): from neurovault.apps.statmaps.models import Similarity, Comparison # We will always calculate Comparison 1 vs 2, never 2 vs 1 if pk1 != pk2: try: sorted_images = get_images_by_ordered_id(pk1, pk2) except Http404: # files have been deleted in the meantime return image1 = sorted_images[0] image2 = sorted_images[1] pearson_metric = Similarity.objects.get( similarity_metric="pearson product-moment correlation coefficient", transformation="voxelwise") # Get standard space brain mr_directory = get_data_directory() reference = "%s/MNI152_T1_2mm_brain_mask.nii.gz" %(mr_directory) image_paths = [image.file.path for image in [image1, image2]] images_resamp, _ = resample_images_ref(images=image_paths, reference=reference, interpolation="continuous", resample_dim=resample_dim) # resample_images_ref will "squeeze" images, but we should keep error here for now for image_nii, image_obj in zip(images_resamp, [image1, image2]): if len(numpy.squeeze(image_nii.get_data()).shape) != 3: raise Exception("Image %s (id=%d) has incorrect number of dimensions %s"%(image_obj.name, image_obj.id, str(image_nii.get_data().shape))) # Calculate correlation only on voxels that are in both maps (not zero, and not nan) image1_res = images_resamp[0] image2_res = images_resamp[1] binary_mask = make_binary_deletion_mask(images_resamp) binary_mask = nib.Nifti1Image(binary_mask,header=image1_res.get_header(),affine=image1_res.get_affine()) # Will return nan if comparison is not possible pearson_score = calculate_correlation([image1_res,image2_res],mask=binary_mask,corr_type="pearson") # Only save comparison if is not nan if not numpy.isnan(pearson_score): Comparison.objects.update_or_create(image1=image1, image2=image2, similarity_metric=pearson_metric, similarity_score=pearson_score) return image1.pk,image2.pk,pearson_score else: raise Exception("You are trying to compare an image with itself!")
def assess_similarity(image1_holdout, image2_holdout, predicted_nii1, predicted_nii2, actual_nii1, actual_nii2): correct = 0 lookup = dict() lookup[actual_nii1] = image1_holdout lookup[actual_nii2] = image2_holdout lookup[nii1] = image1_holdout lookup[nii2] = image2_holdout comparison_df = pandas.DataFrame( columns=["actual", "predicted", "cca_score"]) comparisons = [[actual_nii1, nii1], [actual_nii1, nii2], [actual_nii2, nii1], [actual_nii2, nii2]] count = 0 for comp in comparisons: name1 = lookup[comp[0]] name2 = lookup[comp[1]] corr = calculate_correlation(comp, mask=standard_mask) comparison_df.loc[count] = [name1, name2, corr] count += 1 # actual predicted cca_score #0 3186 3186 0.908997 #1 3186 420 0.485644 #2 420 3186 0.044668 #3 420 420 0.657109 # Save list of [actual,predicted] to add to confusion predictions = [] # Calculate accuracy acc1 = comparison_df[comparison_df.actual == image1_holdout] acc2 = comparison_df[comparison_df.actual == image2_holdout] # Did we predict image1 to be image1? if acc1.loc[acc1.predicted == image1_holdout, "cca_score"].tolist( )[0] > acc1.loc[acc1.predicted == image2_holdout, "cca_score"].tolist()[0]: correct += 1 predictions.append([image1_holdout, image1_holdout]) # image 1 predicted to be image 1 else: predictions.append([image1_holdout, image2_holdout]) # image 1 predicted to be image 2 # Did we predict image2 to be image2? if acc2.loc[acc2.predicted == image2_holdout, "cca_score"].tolist( )[0] > acc2.loc[acc2.predicted == image1_holdout, "cca_score"].tolist()[0]: correct += 1 predictions.append([image2_holdout, image2_holdout]) # image 2 predicted to be image 2 else: predictions.append([image2_holdout, image1_holdout]) # image 2 predicted to be image 1 return comparison_df, predictions, correct
def assess_similarity(pmid1,pmid2,predicted_nii1,predicted_nii2,actual_nii1,actual_nii2): lookup = dict() lookup[actual_nii1] = pmid1 lookup[actual_nii2] = pmid2 lookup[predicted_nii1] = pmid1 lookup[predicted_nii2] = pmid2 comparison_df = pandas.DataFrame(columns=["actual","predicted","cca_score"]) comparisons = [[actual_nii1,predicted_nii1],[actual_nii1,predicted_nii2],[actual_nii2,predicted_nii1],[actual_nii2,predicted_nii2]] count=0 for comp in comparisons: name1 = lookup[comp[0]] name2 = lookup[comp[1]] corr = calculate_correlation(comp,mask=standard_mask) comparison_df.loc[count] = [name1,name2,corr] count+=1 # actual predicted cca_score #0 3186 3186 0.908997 #1 3186 420 0.485644 #2 420 3186 0.044668 #3 420 420 0.657109 # Calculate accuracy correct = 0 acc1 = comparison_df[comparison_df.actual==pmid1] acc2 = comparison_df[comparison_df.actual==pmid2] # Save list of [actual,predicted] to add to confusion predictions = [] # Did we predict image1 to be image1? if acc1.loc[acc1.predicted==pmid1,"cca_score"].tolist()[0] > acc1.loc[acc1.predicted==pmid2,"cca_score"].tolist()[0]: correct+=1 predictions.append([pmid1,pmid1]) else: predictions.append([pmid1,pmid2]) # Did we predict image2 to be image2? if acc2.loc[acc2.predicted==pmid2,"cca_score"].tolist()[0] > acc2.loc[acc2.predicted==pmid1,"cca_score"].tolist()[0]: correct+=1 predictions.append([pmid2,pmid2]) else: predictions.append([pmid2,pmid1]) return comparison_df,predictions,correct
shutil.copyfile(zmap, zmap_new) if len(glob("%s/*.nii.gz" % (outfolder_z))) != images.shape[0]: raise ValueError("ERROR: not all images were found in final folder %s" % (outfolder_z)) # NEEDED for future analyses, # moved to https://github.com/vsoch/semantic-image-comparison/tree/master/analysis/wang/data images.to_csv("%s/contrast_defined_images_filtered.tsv" % results, encoding="utf-8", sep="\t") ## STEP 2: IMAGE SIMILARITY ###################################################### from pybraincompare.compare.mrutils import make_binary_deletion_mask from pybraincompare.compare.maths import calculate_correlation """ Usage: calculate_correlation(images,mask=None,atlas=None,summary=False,corr_type="pearson"): make_binary_deletion_mask(images) """ standard = nibabel.load(standard) # Function to pad ID with appropriate number of zeros def pad_zeros(the_id, total_length=6): return "%s%s" % ((total_length - len(str(the_id))) * "0", the_id) # Calculate image similarity with pearson correlation # Feasible to run in serial for small number of images print "Calculating spatial image similarity with pearson score, complete case analysis (set of overlapping voxels) for pairwise images..." image_ids = images.image_id.tolist() simmatrix = pandas.DataFrame(columns=image_ids, index=image_ids)
zeros[standard_mask.get_data() != 0] = vector return nibabel.Nifti1Image(zeros, affine=standard_mask.get_affine()) count = 0 for neurosynth_map in neurosynth_maps: concept_name = os.path.basename(neurosynth_map).replace( "_regparams.nii.gz", "") concept = get_concept(name=concept_name).json[0] neurovault_map = "%s/results/classification_final/%s_regparam_z.nii.gz" % ( base, concept["id"]) if neurovault_map in neurovault_maps: print "Found match for %s" % (concept_name) nsmap = nibabel.load(neurosynth_map) nvmap = nibabel.load(neurovault_map) score = calculate_correlation([nsmap, nvmap], mask=standard_mask) # Let's also calculate just for overlapping voxels cca_mask = make_binary_deletion_mask([nsmap, nvmap]) nvoxels = len(cca_mask[cca_mask != 0]) cca_mask = nibabel.Nifti1Image(cca_mask, affine=standard_mask.get_affine()) cca_score = calculate_correlation([nsmap, nvmap], mask=cca_mask) # And finally, since we see consistent size of cca mask (meaning not a lot of zeros) let's # try thresholding at +/- 1. The nsmap needs to be converted to z score image_df = get_images_df([nsmap, nvmap], mask=standard_mask) image_df.loc[0] = (image_df.loc[0] - image_df.loc[0].mean()) / image_df.loc[0].std() nsmap_thresh = make_brainmap(image_df.loc[0], standard_mask) nsmap_thresh = apply_threshold(nsmap_thresh, 1.0) nvmap_thresh = make_brainmap(image_df.loc[1], standard_mask) nvmap_thresh = apply_threshold(nvmap_thresh, 1.0)
reference=reference, interpolation="continuous", resample_dim=[8,8,8]) # SCATTERPLOT COMPARE ---- (with vector input) --------------------------------------------------- # We are also required to provide atlas labels, colors, and correlations, so we calculate those in advance # pybraincompare comes with mni atlas at 2 resolutions, 2mm and 8mm # 8mm is best resolution for rendering data in browser, 2mm is ideal for svg renderings atlases = get_mni_atlas(voxdims=["8","2"]) atlas = atlases["8"] atlas_rendering = atlases["2"] # This function will return a data frame with image vectors, colors, labels # The images must already be registered / in same space as reference corrs_df = calculate_correlation(images=images_resamp,mask=ref_resamp,atlas=atlas,corr_type="pearson") # Option 1: Canvas based, no mouseover of points, will render 100,000's of points output_directory = "/home/vanessa/Desktop/test" scatterplot.scatterplot_canvas(image_vector1=corrs_df.INPUT_DATA_ONE, image_vector2=corrs_df.INPUT_DATA_TWO, image_names=image_names, atlas_vector = corrs_df.ATLAS_DATA, atlas_labels=corrs_df.ATLAS_LABELS, atlas_colors=corrs_df.ATLAS_COLORS, output_directory=output_directory) # Option 2: D3 based, with mouseover of points, limited sampling of images html_snippet,data_table = scatterplot.scatterplot_compare_vector(image_vector1=corrs_df.INPUT_DATA_ONE, image_vector2=corrs_df.INPUT_DATA_TWO,
actual_nii2[standard_mask.get_data()!=0] = actual2.tolist() actual_nii1 = nibabel.Nifti1Image(actual_nii1,affine=standard_mask.get_affine()) actual_nii2 = nibabel.Nifti1Image(actual_nii2,affine=standard_mask.get_affine()) # Make a dictionary to lookup images based on nifti lookup = dict() lookup[actual_nii1] = image1_holdout lookup[actual_nii2] = image2_holdout lookup[nii1] = image1_holdout lookup[nii2] = image2_holdout comparison_df = pandas.DataFrame(columns=["actual","predicted","cca_score"]) comparisons = [[actual_nii1,nii1],[actual_nii1,nii2],[actual_nii2,nii1],[actual_nii2,nii2]] count=0 for comp in comparisons: name1 = lookup[comp[0]] name2 = lookup[comp[1]] corr = calculate_correlation(comp,mask=standard_mask) comparison_df.loc[count] = [name1,name2,corr] count+=1 # actual predicted cca_score #0 3186 3186 0.908997 #1 3186 420 0.485644 #2 420 3186 0.044668 #3 420 420 0.657109 comparison_dfs = comparison_dfs.append(comparison_df) # Calculate accuracy acc1 = comparison_df[comparison_df.actual==image1_holdout] acc2 = comparison_df[comparison_df.actual==image2_holdout] # Did we predict image1 to be image1?
affine=standard_mask.get_affine()) # Make a dictionary to lookup images based on nifti lookup = dict() lookup[actual_nii1] = image1_holdout lookup[actual_nii2] = image2_holdout lookup[nii1] = image1_holdout lookup[nii2] = image2_holdout comparison_df = pandas.DataFrame( columns=["actual", "predicted", "cca_score"]) comparisons = [[actual_nii1, nii1], [actual_nii1, nii2], [actual_nii2, nii1], [actual_nii2, nii2]] count = 0 for comp in comparisons: name1 = lookup[comp[0]] name2 = lookup[comp[1]] corr = calculate_correlation(comp, mask=standard_mask) comparison_df.loc[count] = [name1, name2, corr] count += 1 # actual predicted cca_score #0 3186 3186 0.908997 #1 3186 420 0.485644 #2 420 3186 0.044668 #3 420 420 0.657109 comparison_dfs = comparison_dfs.append(comparison_df) # Calculate accuracy acc1 = comparison_df[comparison_df.actual == image1_holdout] acc2 = comparison_df[comparison_df.actual == image2_holdout] # Did we predict image1 to be image1?
interpolation="continuous", resample_dim=[8, 8, 8]) # SCATTERPLOT COMPARE ---- (with vector input) --------------------------------------------------- # We are also required to provide atlas labels, colors, and correlations, so we calculate those in advance # pybraincompare comes with mni atlas at 2 resolutions, 2mm and 8mm # 8mm is best resolution for rendering data in browser, 2mm is ideal for svg renderings atlases = get_mni_atlas(voxdims=["8", "2"]) atlas = atlases["8"] atlas_rendering = atlases["2"] # This function will return a data frame with image vectors, colors, labels # The images must already be registered / in same space as reference corrs_df = calculate_correlation(images=images_resamp, mask=ref_resamp, atlas=atlas, corr_type="pearson") # Option 1: Canvas based, no mouseover of points, will render 100,000's of points output_directory = "/home/vanessa/Desktop/test" scatterplot.scatterplot_canvas(image_vector1=corrs_df.INPUT_DATA_ONE, image_vector2=corrs_df.INPUT_DATA_TWO, image_names=image_names, atlas_vector=corrs_df.ATLAS_DATA, atlas_labels=corrs_df.ATLAS_LABELS, atlas_colors=corrs_df.ATLAS_COLORS, output_directory=output_directory) # Option 2: D3 based, with mouseover of points, limited sampling of images html_snippet, data_table = scatterplot.scatterplot_compare_vector( image_vector1=corrs_df.INPUT_DATA_ONE,
zmap_new = "%s/%s" %(outfolder_z,os.path.split(zmap)[-1]) shutil.copyfile(zmap,zmap_new) if len(glob("%s/*.nii.gz" %(outfolder_z))) != images.shape[0]: raise ValueError("ERROR: not all images were found in final folder %s" %(outfolder_z)) # NEEDED for future analyses, # moved to https://github.com/vsoch/semantic-image-comparison/tree/master/analysis/wang/data images.to_csv("%s/contrast_defined_images_filtered.tsv" %results,encoding="utf-8",sep="\t") ## STEP 2: IMAGE SIMILARITY ###################################################### from pybraincompare.compare.mrutils import make_binary_deletion_mask from pybraincompare.compare.maths import calculate_correlation """ Usage: calculate_correlation(images,mask=None,atlas=None,summary=False,corr_type="pearson"): make_binary_deletion_mask(images) """ standard = nibabel.load(standard) # Function to pad ID with appropriate number of zeros def pad_zeros(the_id,total_length=6): return "%s%s" %((total_length - len(str(the_id))) * "0",the_id) # Calculate image similarity with pearson correlation # Feasible to run in serial for small number of images print "Calculating spatial image similarity with pearson score, complete case analysis (set of overlapping voxels) for pairwise images..." image_ids = images.image_id.tolist() simmatrix = pandas.DataFrame(columns=image_ids,index=image_ids) for id1 in image_ids:
def make_brainmap(vector,standard_mask): zeros = numpy.zeros(standard_mask.shape) zeros[standard_mask.get_data()!=0] = vector return nibabel.Nifti1Image(zeros,affine=standard_mask.get_affine()) count=0 for neurosynth_map in neurosynth_maps: concept_name = os.path.basename(neurosynth_map).replace("_regparams.nii.gz","") concept = get_concept(name=concept_name).json[0] neurovault_map = "%s/results/classification_final/%s_regparam_z.nii.gz" %(base,concept["id"]) if neurovault_map in neurovault_maps: print "Found match for %s" %(concept_name) nsmap = nibabel.load(neurosynth_map) nvmap = nibabel.load(neurovault_map) score = calculate_correlation([nsmap,nvmap],mask=standard_mask) # Let's also calculate just for overlapping voxels cca_mask = make_binary_deletion_mask([nsmap,nvmap]) nvoxels = len(cca_mask[cca_mask!=0]) cca_mask = nibabel.Nifti1Image(cca_mask,affine=standard_mask.get_affine()) cca_score = calculate_correlation([nsmap,nvmap],mask=cca_mask) # And finally, since we see consistent size of cca mask (meaning not a lot of zeros) let's # try thresholding at +/- 1. The nsmap needs to be converted to z score image_df = get_images_df([nsmap,nvmap],mask=standard_mask) image_df.loc[0] = (image_df.loc[0] - image_df.loc[0].mean()) / image_df.loc[0].std() nsmap_thresh = make_brainmap(image_df.loc[0],standard_mask) nsmap_thresh = apply_threshold(nsmap_thresh,1.0) nvmap_thresh = make_brainmap(image_df.loc[1],standard_mask) nvmap_thresh = apply_threshold(nvmap_thresh,1.0) cca_mask_thresh = make_binary_deletion_mask([nsmap_thresh,nvmap_thresh]) nvoxels_thresh = len(cca_mask_thresh[cca_mask_thresh!=0])