def main(): feature_dict, col_names = pp.set_targets('data/features.txt', threshold=-1) # consider only the terms of interest with open('data/terms.json', 'rb') as f: terms = json.load(f) for key in list(feature_dict): feature_dict[key] = [x for x in terms if feature_dict[key][x] > THRESH] if not feature_dict[key]: del(feature_dict[key]) # filter coordinates based on voxels coord_dict = ex.filter_studies_active_voxels('data/docdict.txt', 'data/MNI152_T1_2mm_brain.nii.gz', threshold=500, radius=6) # ensure that the keys are ints for key in list(coord_dict): if not isinstance(key, int): coord_dict[int(key)] = coord_dict[key] del(coord_dict[key]) # find intersecting dicts coord_dict, feature_dict = ex.get_intersecting_dicts(coord_dict, feature_dict) # get the respective vectors X, y = pp.get_features_targets(coord_dict, feature_dict, labels=terms, mask='data/MNI152_T1_2mm_brain.nii.gz') score_per_class, score_per_label = classify(X, y) with open('class_scores.json', 'wb') as f: json.dump(score_per_class, f) with open('label_scores.json', 'wb') as f: json.dump(score_per_label, f) return
def filter_studies_terms(feature_file, terms=None, threshold=0.001, set_unique_label=False): """ Given the frequency of terms corresponding to each study, as well as the tems to consider, eliminates all studies that have more than one term occuring at frequency >= threshold Parameters ---------- feature_file : str the file with the raw features. terms : list of str, optional the terms that are being considered as labels. If not specified, uses the 25 terms from the original study. threshold : real, optional the frequency of the term for it to be considered, as significant with respect to the study. If not specified, uses 0.001 as by the original paper. set_unique_label : bool, optional defaults to false, when true returns only a single label corresponding to each study Returns ------- feature_dict : dict the dict such that all studies with conflicting labels are eliminated. """ if terms is None: terms = ['Semantic', 'Encoding', 'Executive', 'Language', 'Verbal', 'Phonological', 'Visual', 'Inference', 'Working Memory', 'Conflict', 'Spatial', 'Attention', 'Imagery', 'Action', 'Sensory', 'Perception', 'Auditory', 'Pain', 'Reward', 'Arousal', 'Emotion', 'Social', 'Episodic', 'Retrieval', 'Recognition' ] feature_dict, target_names = pp.set_targets(feature_file, threshold=-1) # validate that the terms are actual features and convert to lower case new_terms = [x.lower() for x in terms if x.lower() in target_names] for key in list(feature_dict.keys()): # remove all studies that have more than one major term if len([x for x in new_terms if feature_dict[key][x] > threshold])>1: del(feature_dict[key]) if set_unique_label: for key in list(feature_dict): vmax = 0 label = None for x in new_terms: if feature_dict[key][x] > vmax: vmax = feature_dict[key][x] label = x if label is not None: feature_dict[key] = label else: del(feature_dict[key]) return feature_dict
def filter_studies_terms(feature_file, terms=None, threshold=0.001, set_unique_label=False): """ Given the frequency of terms corresponding to each study, as well as the tems to consider, eliminates all studies that have more than one term occuring at frequency >= threshold Parameters ---------- feature_dict : str the file with the raw features. terms : list of str, optional the terms that are being considered as labels. If not specified, uses the 25 terms from the original study. threshold : real, optional the frequency of the term for it to be considered, as significant with respect to the study. If not specified, uses 0.001 as by the original paper. set_unique_label : bool, optional defaults to false, when true returns only a single label corresponding to each study Returns ------- feature_dict : dict the dict such that all studies with conflicting labels are eliminated. """ if terms is None: terms = ['Semantic', 'Encoding', 'Executive', 'Language', 'Verbal', 'Phonological', 'Visual', 'Inference', 'Working Memory', 'Conflict', 'Spatial', 'Attention', 'Imagery', 'Action', 'Sensory', 'Perception', 'Auditory', 'Pain', 'Reward', 'Arousal', 'Emotion', 'Social', 'Episodic', 'Retrieval', 'Recognition' ] feature_dict, target_names = pp.set_targets(feature_file, threshold=-1) # validate that the terms are actual features and convert to lower case new_terms = [x.lower() for x in terms if x.lower() in target_names] for key in list(feature_dict.keys()): # remove all studies that have more than one major term if len([x for x in new_terms if feature_dict[key][x] > threshold])>1: del(feature_dict[key]) if set_unique_label: for key in list(feature_dict): vmax = 0 label = None for x in new_terms: if feature_dict[key][x] > vmax: vmax = feature_dict[key][x] label = x if label is not None: feature_dict[key] = label else: del(feature_dict[key]) return feature_dict