Ejemplo n.º 1
0
def calculate_reverse_inference_distance(query_image,in_images,out_images,standard_mask,equal_priors=True):    
    '''calculate_reverse_inference_distance

    return reverse inference value based on generating likelihood scores using distance
    of the query image from the group

    ..note::
        
        Reverse Inference Calculation ------------------------------------------------------------------
        P(node mental process|activation) = P(activation|mental process) * P(mental process)
        divided by
        P(activation|mental process) * P(mental process) + P(A|~mental process) * P(~mental process)
        P(activation|mental process): my voxelwise prior map

    :param query_image: nifti image path
        image that we want to calculate reverse inference score for

    :param subset_in: list of nifti files
        brain maps that are defined for the concept

    :param subset_out: list of nifti files
        the rest

    :param equal_priors: boolean
        use 0.5 as a prior for each group [default True]. If set to False, the
        frequency of the concept in the total set will be used. "True" is recommended for small sets.

    '''
    if len(numpy.intersect1d(in_images,out_images)) > 0:
        raise ValueError("ERROR: in_images and out_images should not share images!")
    all_images = in_images + out_images
    mr = get_images_df(file_paths=all_images,mask=standard_mask)
    mr.index = all_images
    in_subset = mr.loc[in_images]
    out_subset = mr.loc[out_images] 
    if equal_priors:
        p_process_in = 0.5
        p_process_out = 0.5
    else:
        in_count = len(in_images)
        out_count = len(out_images) 
        total = in_count + out_count              # total number of nifti images
        p_process_in = float(in_count) / total    # percentage of niftis in
        p_process_out = float(out_count) / total  # percentage out
    # Read in the query image
    query = get_images_df(file_paths=query_image,mask=standard_mask)
    # Generate a mean image for each group
    mean_image_in = pandas.DataFrame(in_subset.mean())
    mean_image_out = pandas.DataFrame(out_subset.mean())
    # p in/out is similarity between query image and groups
    p_in = numpy.power(calculate_pairwise_correlation(mean_image_in[0],query[0]),2)
    p_out = numpy.power(calculate_pairwise_correlation(mean_image_out[0],query[0]),2)
    # Calculate inference
    numerators = p_in * p_process_in
    denominators = (p_in * p_process_in) + (p_out * p_process_out)
    return (numerators / denominators)
Ejemplo n.º 2
0
    empty_nii = numpy.zeros(dataset.masker.volume.shape)
    empty_nii[dataset.masker.volume.get_data() != 0] = pmid_mr
    empty_nii = nibabel.Nifti1Image(empty_nii,
                                    affine=dataset.masker.volume.get_affine())
    tmpnii = "%s/tmp.nii.gz" % (neurosynth_feature_maps)
    nibabel.save(empty_nii, tmpnii)
    # ***Interpolation must be nearest as neurosynth data is binary!
    nii = resample_img(tmpnii,
                       target_affine=brain_4mm.get_affine(),
                       interpolation="nearest")
    nibabel.save(nii, "%s/%s.nii.gz" % (neurosynth_feature_maps, pmid))

# Load into image data frame
os.remove("%s/tmp.nii.gz" % (neurosynth_feature_maps))
concept_maps_4mm = glob("%s/*.nii.gz" % (neurosynth_feature_maps))
X = get_images_df(file_paths=concept_maps_4mm, mask=brain_4mm)

Xindex = [
    int(
        x.replace(".nii.gz", "").replace(neurosynth_feature_maps,
                                         "").replace("/", ""))
    for x in concept_maps_4mm
]
X.index = Xindex

### ENCODING MODEL
## This is our "features" data frame
# X=load_neurosynth_term_mappings() # size nterms X npapers -  for each paper, a binary encoding of the presence/absence of each cog atlas term in the abstract
# mapping=numpy.zeros(nvoxels,nterms)

# neurosynth_map=load_data() # data from all voxels, size novels X npapers, binary encoding of activation presence/absence
Ejemplo n.º 3
0
def calculate_reverse_inference_distance(query_image,
                                         in_images,
                                         out_images,
                                         standard_mask,
                                         equal_priors=True):
    '''calculate_reverse_inference_distance

    return reverse inference value based on generating likelihood scores using distance
    of the query image from the group

    ..note::
        
        Reverse Inference Calculation ------------------------------------------------------------------
        P(node mental process|activation) = P(activation|mental process) * P(mental process)
        divided by
        P(activation|mental process) * P(mental process) + P(A|~mental process) * P(~mental process)
        P(activation|mental process): my voxelwise prior map

    :param query_image: nifti image path
        image that we want to calculate reverse inference score for

    :param subset_in: list of nifti files
        brain maps that are defined for the concept

    :param subset_out: list of nifti files
        the rest

    :param equal_priors: boolean
        use 0.5 as a prior for each group [default True]. If set to False, the
        frequency of the concept in the total set will be used. "True" is recommended for small sets.

    '''
    if len(numpy.intersect1d(in_images, out_images)) > 0:
        raise ValueError(
            "ERROR: in_images and out_images should not share images!")
    all_images = in_images + out_images
    mr = get_images_df(file_paths=all_images, mask=standard_mask)
    mr.index = all_images
    in_subset = mr.loc[in_images]
    out_subset = mr.loc[out_images]
    if equal_priors:
        p_process_in = 0.5
        p_process_out = 0.5
    else:
        in_count = len(in_images)
        out_count = len(out_images)
        total = in_count + out_count  # total number of nifti images
        p_process_in = float(in_count) / total  # percentage of niftis in
        p_process_out = float(out_count) / total  # percentage out
    # Read in the query image
    query = get_images_df(file_paths=query_image, mask=standard_mask)
    # Generate a mean image for each group
    mean_image_in = pandas.DataFrame(in_subset.mean())
    mean_image_out = pandas.DataFrame(out_subset.mean())
    # p in/out is similarity between query image and groups
    p_in = numpy.power(
        calculate_pairwise_correlation(mean_image_in[0], query[0]), 2)
    p_out = numpy.power(
        calculate_pairwise_correlation(mean_image_out[0], query[0]), 2)
    # Calculate inference
    numerators = p_in * p_process_in
    denominators = (p_in * p_process_in) + (p_out * p_process_out)
    return (numerators / denominators)
Ejemplo n.º 4
0
def get_likelihood_df(nid,
                      in_images,
                      out_images,
                      standard_mask,
                      range_table,
                      threshold=2.96,
                      output_folder=None,
                      method=["binary"]):
    '''get_likelihood_df

    will calculate likelihoods and save to a pandas df pickle. The user must specify the method [default is binary]. 
    Method details:

    ranges:
        - likelihood in all thresholds defined in image (calculate_priors in ranges)
    binary
        - likelihood above / below a certain level [threshold, default=2.96]

    Note: you do not need to calculate likelihoods in advance for the mean metric
    (using a derivation of the distance from a mean image as a probability score)
    In this case, use calculate_reverse_inference_distance
 
    :param nid: str
        a unique identifier, typically a node ID from a pybraincompare.ontology.tree

    :param in_images: list
        a list of files for the "in" group relevant to some concept

    :param out_images: list
        the rest

    :param standard_mask: nibabel.Nifti1Image object
        the standard mask images are in space of

    :param range_table: pandas data frame
        a data frame of ranges with "start" and "stop" to calculate
        the range is based on the mins and max of the entire set of images
        can be generated with pybraincompare.inference.make_range_table

    :param output_folder: path
        folder to save likelihood pickles [default is None]

  
    If output_folder is not specified, the df objects are returned.
    If specified, will return paths to saved pickle objects:
    pbc_likelihood_trm12345_df_in.pkl

    EACH VOXEL IS p(activation in voxel is in threshold)

    '''
    # Read all images into one data frame
    if len(numpy.intersect1d(in_images, out_images)) > 0:
        raise ValueError(
            "ERROR: in_images and out_images should not share images!")
    all_images = in_images + out_images
    mr = get_images_df(file_paths=all_images, mask=standard_mask)
    mr.index = all_images
    in_subset = mr.loc[in_images]
    out_subset = mr.loc[out_images]

    # Calculate likelihood for user defined methods
    df = dict()
    if "ranges" in method:
        df["out_ranges"] = calculate_likelihood_in_ranges(
            in_subset, range_table)
        df["in_ranges"] = calculate_likelihood_in_ranges(
            out_subset, range_table)
        if output_folder:
            df["in_ranges"] = save_likelihood_pickle(df["in_ranges"],
                                                     output_folder, nid,
                                                     "in_ranges")
            df["out_ranges"] = save_likelihood_pickle(df["out_ranges"],
                                                      output_folder, nid,
                                                      "out_ranges")

    if "binary" in method:
        df["in_bin"] = calculate_likelihood_binary(in_subset, threshold)
        df["out_bin"] = calculate_likelihood_binary(out_subset, threshold)
        if output_folder:
            df["in_bin"] = save_likelihood_pickle(df["in_bin"], output_folder,
                                                  nid, "in_bin_%s" % threshold)
            df["in_out"] = save_likelihood_pickle(df["out_bin"], output_folder,
                                                  nid,
                                                  "out_bin_%s" % threshold)

    return df
Ejemplo n.º 5
0
def likelihood_groups_from_tree(
    tree,
    standard_mask,
    input_folder,
    image_pattern="[0]+%s[.]",
    output_folder=None,
    node_pattern="[0-9]+",
):
    '''likelihood_groups_from_tree
    Function to generate likelihood groups from a pybraincompare.ontology.tree object. These groups can then be used to calculate likelihoods (eg, p(activation|cognitive process). The groups are output as pickle objects. This is done because it is ideal to calculate likelihoods on a cluster.

    :param tree: dict
        a dictionary of nodes, with base nodes matching a particular pattern assumed to be image (.nii.gz) files.

    :param standard_mask: nifti image (nibabel) 
        standard image mask that images are registered to

    :param output_folder: path
        a folder path to save likelihood groups

    :param input_folder: path
        the folder of images to be matched to the nodes of the tree.

    :param pattern: str 
        the pattern to match to find the base image nodes. Default is a number of any length [neurovault image primary keys].

    :param image_pattern: str
        a regular expression to find image files in images_folder. Default will match any number of leading zeros, any number, and any extension.

    :param node_pattern: str
        a regular expression to find image nodes in the tree, matched to name

    :return groups: pickle 
        a pickle with the following

    ..note::

            pbc_likelihood_groups_trm_12345.pkl
            
            group["nid"] = "trm_12345"
            group["in"] = ["path1","path2",..."pathN"]
            group["out"] = ["path3","path4",..."pathM"]
            group["meta"]: meta data for the node
            group["range_table"]: a data frame of ranges with "start" and "stop" to calculate
                      the range is based on the mins and max of the entire set of images

    '''
    # Find all nodes in the tree, match to images in folder
    nodes = get_node_fields(tree, field="name", nodes=[])
    contender_files = glob("%s/*" % input_folder)

    # Images will match the specified pattern
    find_nodes = re.compile(node_pattern)
    image_nodes = numpy.unique(
        [node for node in nodes if find_nodes.match(node)]).tolist()

    # Node names must now be matched to files
    file_lookup = dict()
    file_names = [os.path.split(path)[-1] for path in contender_files]
    for node in image_nodes:
        find_file = re.compile(image_pattern % node)
        idx = [file_names.index(x) for x in file_names if find_file.match(x)]
        if len(idx) > 1:
            raise ValueError(
                "ERROR: found %s images that match pattern %s." % len(idx),
                find_file.pattern)
        elif len(idx) == 0:
            print "Did not find file for %s, will not be included in analysis." % (
                node)
        else:
            file_lookup[node] = contender_files[idx[0]]

    # Use pandas dataframe to not risk weird dictionary iteration orders
    files = pandas.DataFrame(file_lookup.values(), columns=["path"])
    files.index = file_lookup.keys()

    # The remaining nodes in the tree (that are not images) will have a RI score
    concept_nodes = [x for x in nodes if x not in image_nodes]

    # create table of voxels for all images (the top node)
    mr = get_images_df(file_paths=files.path, mask=standard_mask)
    mr.index = files.index

    range_table = make_range_table(mr)

    # GROUPS ----------------------------------------------------
    # Find groups for image sets at each node (**node names must be unique)
    # This is images at (and in lower levels) of node vs. everything else
    # will be used to calculate p([activation in range | region (voxel)]

    groups = []

    for concept_node in concept_nodes:
        node = get_node_by_name(tree, concept_node)
        node_id = node["nid"]  # for save image
        node_meta = node["meta"]
        if node:
            all_children = get_node_fields(node, "name", [])
            children_in = [
                child for child in all_children if child in files.index
            ]
            children_out = [
                child for child in files.index if child not in children_in
            ]
            if len(children_in) > 0 and len(children_out) > 0:
                print "Generating group for concept node %s" % (concept_node)
                group = {
                    "in": files.path.loc[children_in].unique().tolist(),
                    "out": files.path.loc[children_out].unique().tolist(),
                    "range_table": range_table,
                    "meta": node_meta,
                    "nid": node_id,
                    "name": concept_node
                }

                groups.append(group)
                if output_folder != None:
                    pickle.dump(
                        group,
                        open("%s/pbc_group_%s.pkl" % (output_folder, node_id),
                             "wb"))

    return groups
# We will save a vector of 
# Images by Concept data frame, our X
X = pandas.read_csv(labels_tsv,sep="\t",index_col=0)

# Get standard mask, 4mm
standard_mask=get_standard_mask(4)

# Dictionary to look up image files (4mm)
lookup = pickle.load(open(image_lookup,"rb"))

concepts = X.columns.tolist()

# We will go through each voxel (column) in a data frame of image data
image_paths = lookup.values()
mr = get_images_df(file_paths=image_paths,mask=standard_mask)
image_ids = [int(os.path.basename(x).split(".")[0]) for x in image_paths]
mr.index = image_ids


# We will go through each voxel (column) in a data frame of image data
mr = get_images_df(file_paths=group["in"] + group["out"],mask=standard_mask)
image_paths = group["in"] + group["out"]
image_ids_in = [int(os.path.basename(x).split(".")[0]) for x in group["in"]]
image_ids_out = [int(os.path.basename(x).split(".")[0]) for x in group["out"]]
image_ids = image_ids_in + image_ids_out
mr.index = image_ids

# We will save a data frame of pearson scores (to calculate accuracies later)
comparison_dfs = pandas.DataFrame()
# We will save a vector of
# Images by Concept data frame, our X
X = pandas.read_csv(labels_tsv, sep="\t", index_col=0)

# Get standard mask, 4mm
standard_mask = get_standard_mask(4)

# Dictionary to look up image files (4mm)
lookup = pickle.load(open(image_lookup, "rb"))

concepts = X.columns.tolist()

# We will go through each voxel (column) in a data frame of image data
image_paths = lookup.values()
mr = get_images_df(file_paths=image_paths, mask=standard_mask)
image_ids = [int(os.path.basename(x).split(".")[0]) for x in image_paths]
mr.index = image_ids

# We will go through each voxel (column) in a data frame of image data
mr = get_images_df(file_paths=group["in"] + group["out"], mask=standard_mask)
image_paths = group["in"] + group["out"]
image_ids_in = [int(os.path.basename(x).split(".")[0]) for x in group["in"]]
image_ids_out = [int(os.path.basename(x).split(".")[0]) for x in group["out"]]
image_ids = image_ids_in + image_ids_out
mr.index = image_ids

# We will save a data frame of pearson scores (to calculate accuracies later)
comparison_dfs = pandas.DataFrame()

for image_pair in image_pairs:
X = pandas.read_csv(labels_tsv,sep="\t",index_col=0)

# Dictionary to look up image files (4mm)
lookup = pickle.load(open(image_lookup,"rb"))

# Get standard mask, 4mm
standard_mask=get_standard_mask(4)

# We will save data to dictionary
result = dict()

concepts = X.columns.tolist()

# We will go through each voxel (column) in a data frame of image data
image_paths = lookup.values()
mr = get_images_df(file_paths=image_paths,mask=standard_mask)
image_ids = [int(os.path.basename(x).split(".")[0]) for x in image_paths]
mr.index = image_ids
   
# what we can do is generate a predicted image for a particular set of concepts (e.g, for a left out image) by simply multiplying the concept vector by the regression parameters at each voxel.  then you can do the mitchell trick of asking whether you can accurately classify two left-out images by matching them with the two predicted images. 

regression_params = pandas.DataFrame(0,index=mr.columns,columns=concepts)

print "Training voxels..."
for voxel in mr.columns:
    train = mr.index
    Y = mr.loc[train,voxel].tolist()
    Xtrain = X.loc[train,:] 
    # Use regularized regression
    clf = linear_model.ElasticNet(alpha=0.1)
    clf.fit(Xtrain,Y)
X = scaled

# Dictionary to look up image files (4mm)
lookup = pickle.load(open(image_lookup, "rb"))

# Get standard mask, 4mm
standard_mask = get_standard_mask(4)

# We will save data to dictionary
result = dict()

concepts = X.columns.tolist()

# We will go through each voxel (column) in a data frame of image data
image_paths = lookup.values()
mr = get_images_df(file_paths=image_paths, mask=standard_mask)
image_ids = [int(os.path.basename(x).split(".")[0]) for x in image_paths]
mr.index = image_ids

norm = pandas.DataFrame(columns=mr.columns)

# Normalize the image data by number of subjects
#V* = V/sqrt(S)
for row in mr.iterrows():
    subid = row[0]
    number_of_subjects = image_df.loc[subid].number_of_subjects.tolist()
    norm_vector = row[1] / numpy.sqrt(number_of_subjects)
    norm.loc[subid] = norm_vector

del mr
Ejemplo n.º 10
0
def likelihood_groups_from_tree(tree,standard_mask,input_folder,image_pattern="[0]+%s[.]",
                                output_folder=None,node_pattern="[0-9]+",):
    '''likelihood_groups_from_tree
    Function to generate likelihood groups from a pybraincompare.ontology.tree object. These groups can then be used to calculate likelihoods (eg, p(activation|cognitive process). The groups are output as pickle objects. This is done because it is ideal to calculate likelihoods on a cluster.

    :param tree: dict
        a dictionary of nodes, with base nodes matching a particular pattern assumed to be image (.nii.gz) files.

    :param standard_mask: nifti image (nibabel) 
        standard image mask that images are registered to

    :param output_folder: path
        a folder path to save likelihood groups

    :param input_folder: path
        the folder of images to be matched to the nodes of the tree.

    :param pattern: str 
        the pattern to match to find the base image nodes. Default is a number of any length [neurovault image primary keys].

    :param image_pattern: str
        a regular expression to find image files in images_folder. Default will match any number of leading zeros, any number, and any extension.

    :param node_pattern: str
        a regular expression to find image nodes in the tree, matched to name

    :return groups: pickle 
        a pickle with the following

    ..note::

            pbc_likelihood_groups_trm_12345.pkl
            
            group["nid"] = "trm_12345"
            group["in"] = ["path1","path2",..."pathN"]
            group["out"] = ["path3","path4",..."pathM"]
            group["meta"]: meta data for the node
            group["range_table"]: a data frame of ranges with "start" and "stop" to calculate
                      the range is based on the mins and max of the entire set of images

    '''
    # Find all nodes in the tree, match to images in folder
    nodes = get_node_fields(tree,field="name",nodes=[])
    contender_files = glob("%s/*" %input_folder)

    # Images will match the specified pattern
    find_nodes = re.compile(node_pattern)
    image_nodes = numpy.unique([node for node in nodes if find_nodes.match(node)]).tolist()

    # Node names must now be matched to files
    file_lookup = dict()
    file_names = [os.path.split(path)[-1] for path in contender_files]
    for node in image_nodes:
        find_file = re.compile(image_pattern %node)
        idx = [file_names.index(x) for x in file_names if find_file.match(x)]
        if len(idx) > 1:
            raise ValueError("ERROR: found %s images that match pattern %s." %len(idx),find_file.pattern)
        elif len(idx) == 0:
            print "Did not find file for %s, will not be included in analysis." %(node)
        else:
            file_lookup[node] = contender_files[idx[0]]

    # Use pandas dataframe to not risk weird dictionary iteration orders
    files = pandas.DataFrame(file_lookup.values(),columns=["path"])
    files.index = file_lookup.keys()
 
    # The remaining nodes in the tree (that are not images) will have a RI score
    concept_nodes = [x for x in nodes if x not in image_nodes] 

    # create table of voxels for all images (the top node)
    mr = get_images_df(file_paths=files.path,mask=standard_mask)
    mr.index = files.index

    range_table = make_range_table(mr)

    # GROUPS ----------------------------------------------------
    # Find groups for image sets at each node (**node names must be unique) 
    # This is images at (and in lower levels) of node vs. everything else
    # will be used to calculate p([activation in range | region (voxel)]

    groups = []

    for concept_node in concept_nodes:
        node = get_node_by_name(tree,concept_node)
        node_id = node["nid"] # for save image
        node_meta = node["meta"]
        if node:
            all_children = get_node_fields(node,"name",[])
            children_in = [child for child in all_children if child in files.index]
            children_out = [child for child in files.index if child not in children_in]
            if len(children_in) > 0 and len(children_out) > 0:
                print "Generating group for concept node %s" %(concept_node)
                group = {"in": files.path.loc[children_in].unique().tolist(),
                         "out": files.path.loc[children_out].unique().tolist(),
                         "range_table": range_table,
                         "meta": node_meta,
                         "nid": node_id,
                         "name": concept_node}                

                groups.append(group)
                if output_folder != None:
                    pickle.dump(group,open("%s/pbc_group_%s.pkl" %(output_folder,node_id),"wb"))

    return groups
Ejemplo n.º 11
0
def get_likelihood_df(nid,in_images,out_images,standard_mask,range_table,
                      threshold=2.96,output_folder=None,method=["binary"]):

    '''get_likelihood_df

    will calculate likelihoods and save to a pandas df pickle. The user must specify the method [default is binary]. 
    Method details:

    ranges:
        - likelihood in all thresholds defined in image (calculate_priors in ranges)
    binary
        - likelihood above / below a certain level [threshold, default=2.96]

    Note: you do not need to calculate likelihoods in advance for the mean metric
    (using a derivation of the distance from a mean image as a probability score)
    In this case, use calculate_reverse_inference_distance
 
    :param nid: str
        a unique identifier, typically a node ID from a pybraincompare.ontology.tree

    :param in_images: list
        a list of files for the "in" group relevant to some concept

    :param out_images: list
        the rest

    :param standard_mask: nibabel.Nifti1Image object
        the standard mask images are in space of

    :param range_table: pandas data frame
        a data frame of ranges with "start" and "stop" to calculate
        the range is based on the mins and max of the entire set of images
        can be generated with pybraincompare.inference.make_range_table

    :param output_folder: path
        folder to save likelihood pickles [default is None]

  
    If output_folder is not specified, the df objects are returned.
    If specified, will return paths to saved pickle objects:
    pbc_likelihood_trm12345_df_in.pkl

    EACH VOXEL IS p(activation in voxel is in threshold)

    '''
    # Read all images into one data frame
    if len(numpy.intersect1d(in_images,out_images)) > 0:
        raise ValueError("ERROR: in_images and out_images should not share images!")
    all_images = in_images + out_images
    mr = get_images_df(file_paths=all_images,mask=standard_mask)
    mr.index = all_images
    in_subset = mr.loc[in_images]
    out_subset = mr.loc[out_images] 

    # Calculate likelihood for user defined methods
    df = dict()    
    if "ranges" in method:
        df["out_ranges"] = calculate_likelihood_in_ranges(in_subset,range_table)
        df["in_ranges"] = calculate_likelihood_in_ranges(out_subset,range_table)
        if output_folder:
            df["in_ranges"] = save_likelihood_pickle(df["in_ranges"],output_folder,nid,"in_ranges")         
            df["out_ranges"] = save_likelihood_pickle(df["out_ranges"],output_folder,nid,"out_ranges")         

    if "binary" in method:
        df["in_bin"] = calculate_likelihood_binary(in_subset,threshold)
        df["out_bin"] = calculate_likelihood_binary(out_subset,threshold)
        if output_folder:
            df["in_bin"] = save_likelihood_pickle(df["in_bin"],output_folder,nid,"in_bin_%s" %threshold)         
            df["in_out"] = save_likelihood_pickle(df["out_bin"],output_folder,nid,"out_bin_%s" %threshold)         
     
    return df 
brain_4mm = get_standard_mask(4)
for pmid in df.columns:
    pmid_mr = df[pmid].tolist()
    empty_nii = numpy.zeros(dataset.masker.volume.shape)
    empty_nii[dataset.masker.volume.get_data()!=0] = pmid_mr
    empty_nii = nibabel.Nifti1Image(empty_nii,affine=dataset.masker.volume.get_affine())
    tmpnii = "%s/tmp.nii.gz" %(neurosynth_feature_maps)
    nibabel.save(empty_nii,tmpnii)
    # ***Interpolation must be nearest as neurosynth data is binary!
    nii = resample_img(tmpnii,target_affine=brain_4mm.get_affine(),interpolation="nearest")
    nibabel.save(nii,"%s/%s.nii.gz" %(neurosynth_feature_maps,pmid))

# Load into image data frame
os.remove("%s/tmp.nii.gz"%(neurosynth_feature_maps))
concept_maps_4mm = glob("%s/*.nii.gz"%(neurosynth_feature_maps))
X = get_images_df(file_paths=concept_maps_4mm,mask=brain_4mm)

Xindex = [int(x.replace(".nii.gz","").replace(neurosynth_feature_maps,"").replace("/","")) for x in concept_maps_4mm]
X.index = Xindex

### ENCODING MODEL
## This is our "features" data frame
# X=load_neurosynth_term_mappings() # size nterms X npapers -  for each paper, a binary encoding of the presence/absence of each cog atlas term in the abstract
# mapping=numpy.zeros(nvoxels,nterms)

# neurosynth_map=load_data() # data from all voxels, size novels X npapers, binary encoding of activation presence/absence

# Get rid of entry with all zeros (PMID does not have abstract)
features=features.drop(9728909,axis=0)
X=X.drop(9728909,axis=0)