def disorder_json_dump(): ''' use cognitiveatlas library to dump disorders to a json file ''' all_disorders = [] disorders = get_disorder().json for disorder in disorders: all_disorders.append(get_disorder(id=disorder['id']).json) with open("all_disorders.json", 'w') as fp: json.dump(all_disorders, fp)
def test_disorder(): print("### TESTING DISORDER QUERIES:") disorder_id = "dso_3324" disorder_name = "mood disorder" # disorder_id and disorder_name result = get_disorder(id=disorder_id, name=disorder_name) assert_equal(result.json[0]["name"], disorder_name) assert_equal(result.json[0]["is_a_fulltext"], "cognitive disorder") assert_equal(result.json[0]["event_stamp"], "2013-11-20 15:38:27") # disorder_id result = get_disorder(id=disorder_id) assert_equal(result.json[0]["name"], disorder_name) # disorder_name result = get_disorder(name=disorder_name) assert_equal(result.json[0]["id"], disorder_id)
def test_disorder(): print "### TESTING DISORDER QUERIES:" disorder_id = "dso_3324" disorder_name = "mood disorder" # disorder_id and disorder_name result = get_disorder(id=disorder_id,name=disorder_name) assert_equal(result.json[0]["name"],disorder_name) assert_equal(result.json[0]["is_a_fulltext"],"cognitive disorder") assert_equal(result.json[0]["event_stamp"],"2013-11-20 15:38:27") # disorder_id result = get_disorder(id=disorder_id) assert_equal(result.json[0]["name"],disorder_name) # disorder_name result = get_disorder(name=disorder_name) assert_equal(result.json[0]["id"],disorder_id)
def get_isAbout_label(url): ''' Added by DBK to get labels for isAbout urls :param url: url to get label for :return: string label ''' scicrunch_base_uri = 'https://scicrunch.org/api/1/ilx/search/curie/' # load user's api key from environment variable. If not found then exit with error message try: user_key = os.environ["INTERLEX_API_KEY"] except KeyError: print("Please set the environment variable INTERLEX_API_KEY") sys.exit(1) if "cognitiveatlas" in url: #skip for things that aren't concepts or disorders for the time being if ("concept" not in url) and ("disorder" not in url): # for now if we don't have a concept or disorder url from cogatlas then just return nothing for label # will need to work with cog atlas folks about how to retrieve tasks and other types from cog atlas return "" #print(url) # parse out id of term and get using cog atlas python tool... id = url.rsplit('/', 1)[0].rsplit('/', 1)[1] # don't know if this is a concept or disorder so we'll try both try: tmp = get_concept(id=id, silent=True) label = tmp.json['name'].lower() #print("cogatlas concept label: %s" %(isAbout_term_labels[url])) except: tmp = get_disorder(id=id, silent=True) label = tmp.json['name'].lower() #print("cogatlas disorder label: %s" %isAbout_term_labels[url]) elif "interlex" in url: # get label for interlex terms payload = {} headers = {} full_url = scicrunch_base_uri + url.rsplit('/', 1)[1].replace( '_', ':').rstrip("']'") + "?key=" + user_key #print(full_url) response = requests.request("GET", full_url, headers=headers, data=payload) # response is a json dictionary. here we want the label label = response.json()["data"]["label"].lower() #print("interlex label: %s" %isAbout_term_labels[url] ) return label
def pull_ontology(out_dir='auto', overwrite=False): """ Download Cognitive Atlas ontology and combine Concepts, Tasks, and Disorders to create ID and relationship DataFrames. """ if out_dir == 'auto': out_dir = op.join(get_resource_path(), 'ontology') else: out_dir = op.abspath(out_dir) ids_file = op.join(out_dir, 'cogat_ids.csv') rels_file = op.join(out_dir, 'cogat_relationships.csv') if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]): concepts = get_concept(silent=True).pandas tasks = get_task(silent=True).pandas disorders = get_disorder(silent=True).pandas # Identifiers and aliases long_concepts = _longify(concepts) long_tasks = _longify(tasks) # Disorders currently lack aliases disorders['name'] = disorders['name'].str.lower() disorders = disorders.assign(alias=disorders['name']) disorders = disorders[['id', 'name', 'alias']] # Combine into id_df id_df = pd.concat((long_concepts, long_tasks, disorders), axis=0) id_df = _expand_df(id_df) id_df = id_df.replace('', np.nan) id_df = id_df.dropna(axis=0) id_df = id_df.reset_index(drop=True) # Relationships relationships = [] for i, id_ in enumerate(concepts['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) concept = get_concept(id=id_, silent=True).json for rel in concept['relationships']: reltype = _get_concept_reltype(rel['relationship'], rel['direction']) if reltype is not None: row = [id_, rel['id'], reltype] relationships.append(row) for i, id_ in enumerate(tasks['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) task = get_task(id=id_, silent=True).json for rel in task['concepts']: row = [id_, rel['concept_id'], 'measures'] relationships.append(row) row = [rel['concept_id'], id_, 'measuredBy'] relationships.append(row) for i, id_ in enumerate(disorders['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) disorder = get_disorder(id=id_, silent=True).json for rel in disorder['disorders']: if rel['relationship'] == 'ISA': rel_type = 'isA' else: rel_type = rel['relationship'] row = [id_, rel['id'], rel_type] relationships.append(row) rel_df = pd.DataFrame(columns=['input', 'output', 'rel_type'], data=relationships) ctp_df = concepts[['id', 'id_concept_class']] ctp_df = ctp_df.assign(rel_type='inCategory') ctp_df.columns = ['input', 'output', 'rel_type'] ctp_df['output'].replace('', np.nan, inplace=True) ctp_df.dropna(axis=0, inplace=True) rel_df = pd.concat((ctp_df, rel_df)) rel_df = rel_df.reset_index(drop=True) id_df.to_csv(ids_file, index=False) rel_df.to_csv(rels_file, index=False) else: id_df = pd.read_csv(ids_file) rel_df = pd.read_csv(rels_file) return id_df, rel_df
def download_cognitive_atlas(data_dir=None, overwrite=False, verbose=1): """Download Cognitive Atlas ontology and extract IDs and relationships. .. versionadded:: 0.0.2 Parameters ---------- data_dir : :obj:`str`, optional Location in which to place Cognitive Atlas files. Default is None, which uses the package's default path for downloaded data. overwrite : :obj:`bool`, optional Whether to overwrite existing files or not. Default is False. verbose : :obj:`int`, optional Default is 1. Returns ------- out_dict : :obj:`dict` Dictionary with two keys: 'ids' and 'relationships'. Each points to a csv file. The 'ids' file contains CogAt identifiers, canonical names, and aliases, sorted by alias length (number of characters). The 'relationships' file contains associations between CogAt items, with three columns: input, output, and rel_type (relationship type). """ from cognitiveatlas.api import get_concept, get_disorder, get_task dataset_name = "cognitive_atlas" data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) ids_file = op.join(data_dir, "cogat_aliases.csv") rels_file = op.join(data_dir, "cogat_relationships.csv") if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]): concepts = get_concept(silent=True).pandas tasks = get_task(silent=True).pandas disorders = get_disorder(silent=True).pandas # Identifiers and aliases long_concepts = _longify(concepts) long_tasks = _longify(tasks) # Disorders currently lack aliases disorders["name"] = disorders["name"].str.lower() disorders = disorders.assign(alias=disorders["name"]) disorders = disorders[["id", "name", "alias"]] # Combine into aliases DataFrame aliases = pd.concat((long_concepts, long_tasks, disorders), axis=0) aliases = _expand_df(aliases) aliases = aliases.replace("", np.nan) aliases = aliases.dropna(axis=0) aliases = aliases.reset_index(drop=True) # Relationships relationship_list = [] for i, id_ in enumerate(concepts["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) concept = get_concept(id=id_, silent=True).json for rel in concept["relationships"]: reltype = _get_concept_reltype(rel["relationship"], rel["direction"]) if reltype is not None: row = [id_, rel["id"], reltype] relationship_list.append(row) for i, id_ in enumerate(tasks["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) task = get_task(id=id_, silent=True).json for rel in task["concepts"]: row = [id_, rel["concept_id"], "measures"] relationship_list.append(row) row = [rel["concept_id"], id_, "measuredBy"] relationship_list.append(row) for i, id_ in enumerate(disorders["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) disorder = get_disorder(id=id_, silent=True).json for rel in disorder["disorders"]: if rel["relationship"] == "ISA": rel_type = "isA" else: rel_type = rel["relationship"] row = [id_, rel["id"], rel_type] relationship_list.append(row) relationships = pd.DataFrame(columns=["input", "output", "rel_type"], data=relationship_list) ctp_df = concepts[["id", "id_concept_class"]] ctp_df = ctp_df.assign(rel_type="inCategory") ctp_df.columns = ["input", "output", "rel_type"] ctp_df["output"].replace("", np.nan, inplace=True) ctp_df.dropna(axis=0, inplace=True) relationships = pd.concat((ctp_df, relationships)) relationships = relationships.reset_index(drop=True) aliases.to_csv(ids_file, index=False) relationships.to_csv(rels_file, index=False) out_dict = {"ids": ids_file, "relationships": rels_file} return out_dict
def main(argv): parser = ArgumentParser( description= 'This program will find all *.jsonld files in the list of input' 'directories and compute the frequency of use of isAbout concepts. ' 'The frequency table will be exported as a markdown table for use in' 'web documents or GitHub README markdown files. ') parser.add_argument('-jsonld', dest='jsonld', nargs='+', default=[], required=True, help="space separated list" "of directories to evaluate for jsonld files.") parser.add_argument('-outfile', dest='outfile', required=True, help="Output file for markdown table, full path") parser.add_argument( '-jsonld_output_dir', dest='jsonld_output_dir', required=True, help="This is a directory" "where we'll store the concept json-ld files using the NIDM-Terms properties" ) args = parser.parse_args() isAbout_terms = {} total_concept_count = 0 # download context file for json-ld files of concepts used # try to open the url and get the pointed to file try: # open url and get file opener = url.urlopen(CONTEXT) # write temporary file to disk and use for stats temp = tempfile.NamedTemporaryFile(delete=False) temp.write(opener.read()) temp.close() context_file = temp.name except: print("ERROR! Can't open url: %s" % CONTEXT) exit() # load downloaded context with open(context_file) as context_data: context = json.load(context_data) # Retrieve cognitive atlas concepts and disorders cogatlas_concepts = get_concept(silent=True) cogatlas_disorders = get_disorder(silent=True) # for each input directory for direct in args.jsonld: # find *.jsonld files files = glob2.glob(direct + '/**/*.jsonld', recursive=True) # loop through each file and get isAbout terms for file in files: # read file with json # open the file as a dictionary print("opening file: %s" % file) with open(file) as dct: json_tmp = json.load(dct) if type(json_tmp['terms']) is dict: # for each key (term) in jsonld file, check for isAbout property for term in json_tmp['terms'].keys(): # expanded = jsonld.expand(json_tmp[term]) # for jsonld files with only a single term we have a simple dictionary where the term label isn't # the highest-level key so we handle differently if term == 'isAbout': if isinstance(json_tmp['terms'][term], list): # if not a dictionary then a list of dictionaries for isabout_entry in json_tmp['terms'][term][ 'isAbout']: # add concept to dictionary total_concept_count = add_to_dict( id, isabout_entry, isAbout_terms, total_concept_count) if INTERLEX_URI_FRAG in isabout_entry['@id']: # for storing concept as json-ld file concept_jsonld = get_interlex_concept_properties( isabout_entry['@id'], context) elif (COGATLAS_URI_FRAG in isabout_entry['@id']) and ( "task" not in isabout_entry['@id']): concept_jsonld = get_cogatlas_properties( isabout_entry, context, cogatlas_concepts, cogatlas_disorders) elif "task" in isabout_entry['@id']: concept_jsonld = get_cogatlas_task_properties( isabout_entry, context) else: continue # write concept jsonld file write_jsonld(concept_jsonld, args.jsonld_output_dir) # else it's a dictionary with a single isAbout entry else: total_concept_count = add_to_dict( id, json_tmp['terms'][term], isAbout_terms, total_concept_count) if INTERLEX_URI_FRAG in json_tmp['terms'][term]: # for storing concept as json-ld file concept_jsonld = get_interlex_concept_properties( json_tmp['terms'][term]['@id'], context) elif COGATLAS_URI_FRAG in json_tmp['terms'][term]['@id'] and \ ("task" not in json_tmp['terms'][term]['@id']): concept_jsonld = get_cogatlas_properties( json_tmp['terms'][term], context, cogatlas_concepts, cogatlas_disorders) elif "task" in json_tmp['terms'][term]: concept_jsonld = get_cogatlas_task_properties( json_tmp['terms'][term], context) else: continue # write concept jsonld file write_jsonld(concept_jsonld, args.jsonld_output_dir) elif type(json_tmp['terms']) is list: for term in json_tmp['terms']: # expanded = jsonld.expand(json_tmp[term]) # for jsonld files with only a single term we have a simple dictionary where the term label isn't # the highest-level key so we handle differently for property in term: if property == 'isAbout': # for each concept in isAbout property if isinstance(term[property], list): for isabout_entry in term[property]: total_concept_count = add_to_dict( id, isabout_entry, isAbout_terms, total_concept_count) if INTERLEX_URI_FRAG in isabout_entry[ '@id']: # for storing concept as json-ld file concept_jsonld = get_interlex_concept_properties( isabout_entry['@id'], context) elif COGATLAS_URI_FRAG in isabout_entry['@id'] and \ ("task" not in isabout_entry['@id']): concept_jsonld = get_cogatlas_properties( isabout_entry, context, cogatlas_concepts, cogatlas_disorders) elif "task" in isabout_entry['@id']: concept_jsonld = get_cogatlas_task_properties( isabout_entry['@id'], isabout_entry['label'], context) else: continue # write concept jsonld file write_jsonld(concept_jsonld, args.jsonld_output_dir) else: total_concept_count = add_to_dict( id, term[property], isAbout_terms, total_concept_count) if INTERLEX_URI_FRAG in term[property]['@id']: # for storing concept as json-ld file concept_jsonld = get_interlex_concept_properties( term[property]['@id'], context) elif COGATLAS_URI_FRAG in term[property]['@id'] and \ ("task" not in term[property]['@id']): concept_jsonld = get_cogatlas_properties( term[property], context, cogatlas_concepts, cogatlas_disorders) elif "task" in term[property]['@id']: concept_jsonld = get_cogatlas_task_properties( term[property]['@id'], term[property]['label'], context) else: continue # write concept jsonld file write_jsonld(concept_jsonld, args.jsonld_output_dir) # open markdown txt file md_file = open(args.outfile, "w") ## Added by NQ to test GitHub Actions print('opening output file in', args.outfile) # set up header of table md_file.write("| concept URL | label | use frequency (%) |\n") md_file.write("| ----------- | ----- | ----------------- |\n") # now cycle through isAbout_terms dictionary and compute frequencies for key in isAbout_terms.keys(): isAbout_terms[key]['freq'] = (isAbout_terms[key]['count'] / total_concept_count) * 100.0 res = OrderedDict( sorted(isAbout_terms.items(), key=lambda x: getitem(x[1], 'freq'), reverse=True)) # write markdown table sorted for key in res.keys(): # add to markdown table file md_file.write("| %s | %s | %f |\n" % (key, res[key]['label'], res[key]['freq'])) ##Added by NQ to show that the code finished running print('File has been successfully written in', md_file) md_file.close() # if a single-file jsonld file already exists than add these terms to it else create a new one output_dir = os.path.split(args.jsonld_output_dir)[0] if isfile(join(output_dir, "NIDM_Concepts.jsonld")): cmd = "python " + join(sys.path[0], "combinebidsjsonld.py") + " -inputDir " + args.jsonld_output_dir + " -outputDir " + \ join(output_dir, "NIDM_Concepts.jsonld") + " -association \"NIDM\"" + " -jsonld " + \ join(output_dir, "NIDM_Concepts.jsonld") else: cmd = "python " + join(sys.path[0], "combinebidsjsonld.py") + " -inputDir " + args.jsonld_output_dir + " -outputDir " + \ join(output_dir, "NIDM_Concepts.jsonld") + " -association \"NIDM\"" print(cmd) system(cmd)