Example #1
0
def disorder_json_dump():
    ''' use cognitiveatlas library to dump disorders to a json file '''
    all_disorders = []
    disorders = get_disorder().json
    for disorder in disorders:
        all_disorders.append(get_disorder(id=disorder['id']).json)
    with open("all_disorders.json", 'w') as fp:
        json.dump(all_disorders, fp)
Example #2
0
def test_disorder():
    print("### TESTING DISORDER QUERIES:")
    disorder_id = "dso_3324"
    disorder_name = "mood disorder"

    # disorder_id and disorder_name
    result = get_disorder(id=disorder_id, name=disorder_name)
    assert_equal(result.json[0]["name"], disorder_name)
    assert_equal(result.json[0]["is_a_fulltext"], "cognitive disorder")
    assert_equal(result.json[0]["event_stamp"], "2013-11-20 15:38:27")

    # disorder_id
    result = get_disorder(id=disorder_id)
    assert_equal(result.json[0]["name"], disorder_name)

    # disorder_name
    result = get_disorder(name=disorder_name)
    assert_equal(result.json[0]["id"], disorder_id)
Example #3
0
def test_disorder():
    print "### TESTING DISORDER QUERIES:"
    disorder_id = "dso_3324"
    disorder_name = "mood disorder"

    # disorder_id and disorder_name
    result = get_disorder(id=disorder_id,name=disorder_name)
    assert_equal(result.json[0]["name"],disorder_name)
    assert_equal(result.json[0]["is_a_fulltext"],"cognitive disorder")
    assert_equal(result.json[0]["event_stamp"],"2013-11-20 15:38:27")

    # disorder_id
    result = get_disorder(id=disorder_id)
    assert_equal(result.json[0]["name"],disorder_name)
 
    # disorder_name
    result = get_disorder(name=disorder_name)
    assert_equal(result.json[0]["id"],disorder_id)
Example #4
0
def get_isAbout_label(url):
    '''
    Added by DBK to get labels for isAbout urls
    :param url: url to get label for
    :return: string label
    '''

    scicrunch_base_uri = 'https://scicrunch.org/api/1/ilx/search/curie/'

    # load user's api key from environment variable. If not found then exit with error message
    try:
        user_key = os.environ["INTERLEX_API_KEY"]
    except KeyError:
        print("Please set the environment variable INTERLEX_API_KEY")
        sys.exit(1)

    if "cognitiveatlas" in url:
        #skip for things that aren't concepts or disorders for the time being
        if ("concept" not in url) and ("disorder" not in url):
            # for now if we don't have a concept or disorder url from cogatlas then just return nothing for label
            # will need to work with cog atlas folks about how to retrieve tasks and other types from cog atlas
            return ""
        #print(url)
        # parse out id of term and get using cog atlas python tool...
        id = url.rsplit('/', 1)[0].rsplit('/', 1)[1]
        # don't know if this is a concept or disorder so we'll try both
        try:
            tmp = get_concept(id=id, silent=True)
            label = tmp.json['name'].lower()
            #print("cogatlas concept label: %s" %(isAbout_term_labels[url]))
        except:
            tmp = get_disorder(id=id, silent=True)
            label = tmp.json['name'].lower()
            #print("cogatlas disorder label: %s" %isAbout_term_labels[url])

    elif "interlex" in url:
        # get label for interlex terms
        payload = {}
        headers = {}
        full_url = scicrunch_base_uri + url.rsplit('/', 1)[1].replace(
            '_', ':').rstrip("']'") + "?key=" + user_key
        #print(full_url)
        response = requests.request("GET",
                                    full_url,
                                    headers=headers,
                                    data=payload)
        # response is a json dictionary. here we want the label
        label = response.json()["data"]["label"].lower()
        #print("interlex label: %s" %isAbout_term_labels[url] )

    return label
Example #5
0
def pull_ontology(out_dir='auto', overwrite=False):
    """
    Download Cognitive Atlas ontology and combine Concepts, Tasks, and
    Disorders to create ID and relationship DataFrames.
    """
    if out_dir == 'auto':
        out_dir = op.join(get_resource_path(), 'ontology')
    else:
        out_dir = op.abspath(out_dir)

    ids_file = op.join(out_dir, 'cogat_ids.csv')
    rels_file = op.join(out_dir, 'cogat_relationships.csv')
    if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]):
        concepts = get_concept(silent=True).pandas
        tasks = get_task(silent=True).pandas
        disorders = get_disorder(silent=True).pandas

        # Identifiers and aliases
        long_concepts = _longify(concepts)
        long_tasks = _longify(tasks)

        # Disorders currently lack aliases
        disorders['name'] = disorders['name'].str.lower()
        disorders = disorders.assign(alias=disorders['name'])
        disorders = disorders[['id', 'name', 'alias']]

        # Combine into id_df
        id_df = pd.concat((long_concepts, long_tasks, disorders), axis=0)
        id_df = _expand_df(id_df)
        id_df = id_df.replace('', np.nan)
        id_df = id_df.dropna(axis=0)
        id_df = id_df.reset_index(drop=True)

        # Relationships
        relationships = []
        for i, id_ in enumerate(concepts['id'].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, 'isSelf']
            relationships.append(row)
            concept = get_concept(id=id_, silent=True).json
            for rel in concept['relationships']:
                reltype = _get_concept_reltype(rel['relationship'],
                                               rel['direction'])
                if reltype is not None:
                    row = [id_, rel['id'], reltype]
                    relationships.append(row)

        for i, id_ in enumerate(tasks['id'].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, 'isSelf']
            relationships.append(row)
            task = get_task(id=id_, silent=True).json
            for rel in task['concepts']:
                row = [id_, rel['concept_id'], 'measures']
                relationships.append(row)
                row = [rel['concept_id'], id_, 'measuredBy']
                relationships.append(row)

        for i, id_ in enumerate(disorders['id'].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, 'isSelf']
            relationships.append(row)
            disorder = get_disorder(id=id_, silent=True).json
            for rel in disorder['disorders']:
                if rel['relationship'] == 'ISA':
                    rel_type = 'isA'
                else:
                    rel_type = rel['relationship']
                row = [id_, rel['id'], rel_type]
                relationships.append(row)

        rel_df = pd.DataFrame(columns=['input', 'output', 'rel_type'],
                              data=relationships)
        ctp_df = concepts[['id', 'id_concept_class']]
        ctp_df = ctp_df.assign(rel_type='inCategory')
        ctp_df.columns = ['input', 'output', 'rel_type']
        ctp_df['output'].replace('', np.nan, inplace=True)
        ctp_df.dropna(axis=0, inplace=True)
        rel_df = pd.concat((ctp_df, rel_df))
        rel_df = rel_df.reset_index(drop=True)
        id_df.to_csv(ids_file, index=False)
        rel_df.to_csv(rels_file, index=False)
    else:
        id_df = pd.read_csv(ids_file)
        rel_df = pd.read_csv(rels_file)

    return id_df, rel_df
Example #6
0
def download_cognitive_atlas(data_dir=None, overwrite=False, verbose=1):
    """Download Cognitive Atlas ontology and extract IDs and relationships.

    .. versionadded:: 0.0.2

    Parameters
    ----------
    data_dir : :obj:`str`, optional
        Location in which to place Cognitive Atlas files.
        Default is None, which uses the package's default path for downloaded
        data.
    overwrite : :obj:`bool`, optional
        Whether to overwrite existing files or not. Default is False.
    verbose : :obj:`int`, optional
        Default is 1.

    Returns
    -------
    out_dict : :obj:`dict`
        Dictionary with two keys: 'ids' and 'relationships'. Each points to a
        csv file. The 'ids' file contains CogAt identifiers, canonical names,
        and aliases, sorted by alias length (number of characters).
        The 'relationships' file contains associations between CogAt items,
        with three columns: input, output, and rel_type (relationship type).
    """
    from cognitiveatlas.api import get_concept, get_disorder, get_task

    dataset_name = "cognitive_atlas"
    data_dir = _get_dataset_dir(dataset_name,
                                data_dir=data_dir,
                                verbose=verbose)

    ids_file = op.join(data_dir, "cogat_aliases.csv")
    rels_file = op.join(data_dir, "cogat_relationships.csv")
    if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]):
        concepts = get_concept(silent=True).pandas
        tasks = get_task(silent=True).pandas
        disorders = get_disorder(silent=True).pandas

        # Identifiers and aliases
        long_concepts = _longify(concepts)
        long_tasks = _longify(tasks)

        # Disorders currently lack aliases
        disorders["name"] = disorders["name"].str.lower()
        disorders = disorders.assign(alias=disorders["name"])
        disorders = disorders[["id", "name", "alias"]]

        # Combine into aliases DataFrame
        aliases = pd.concat((long_concepts, long_tasks, disorders), axis=0)
        aliases = _expand_df(aliases)
        aliases = aliases.replace("", np.nan)
        aliases = aliases.dropna(axis=0)
        aliases = aliases.reset_index(drop=True)

        # Relationships
        relationship_list = []
        for i, id_ in enumerate(concepts["id"].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, "isSelf"]
            relationship_list.append(row)
            concept = get_concept(id=id_, silent=True).json
            for rel in concept["relationships"]:
                reltype = _get_concept_reltype(rel["relationship"],
                                               rel["direction"])
                if reltype is not None:
                    row = [id_, rel["id"], reltype]
                    relationship_list.append(row)

        for i, id_ in enumerate(tasks["id"].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, "isSelf"]
            relationship_list.append(row)
            task = get_task(id=id_, silent=True).json
            for rel in task["concepts"]:
                row = [id_, rel["concept_id"], "measures"]
                relationship_list.append(row)
                row = [rel["concept_id"], id_, "measuredBy"]
                relationship_list.append(row)

        for i, id_ in enumerate(disorders["id"].unique()):
            if i % 100 == 0:
                time.sleep(5)
            row = [id_, id_, "isSelf"]
            relationship_list.append(row)
            disorder = get_disorder(id=id_, silent=True).json
            for rel in disorder["disorders"]:
                if rel["relationship"] == "ISA":
                    rel_type = "isA"
                else:
                    rel_type = rel["relationship"]
                row = [id_, rel["id"], rel_type]
                relationship_list.append(row)

        relationships = pd.DataFrame(columns=["input", "output", "rel_type"],
                                     data=relationship_list)
        ctp_df = concepts[["id", "id_concept_class"]]
        ctp_df = ctp_df.assign(rel_type="inCategory")
        ctp_df.columns = ["input", "output", "rel_type"]
        ctp_df["output"].replace("", np.nan, inplace=True)
        ctp_df.dropna(axis=0, inplace=True)
        relationships = pd.concat((ctp_df, relationships))
        relationships = relationships.reset_index(drop=True)
        aliases.to_csv(ids_file, index=False)
        relationships.to_csv(rels_file, index=False)
    out_dict = {"ids": ids_file, "relationships": rels_file}

    return out_dict
Example #7
0
def main(argv):
    parser = ArgumentParser(
        description=
        'This program will find all *.jsonld files in the list of input'
        'directories and compute the frequency of use of isAbout concepts. '
        'The frequency table will be exported as a markdown table for use in'
        'web documents or GitHub README markdown files. ')

    parser.add_argument('-jsonld',
                        dest='jsonld',
                        nargs='+',
                        default=[],
                        required=True,
                        help="space separated list"
                        "of directories to evaluate for jsonld files.")
    parser.add_argument('-outfile',
                        dest='outfile',
                        required=True,
                        help="Output file for markdown table, full path")
    parser.add_argument(
        '-jsonld_output_dir',
        dest='jsonld_output_dir',
        required=True,
        help="This is a directory"
        "where we'll store the concept json-ld files using the NIDM-Terms properties"
    )

    args = parser.parse_args()

    isAbout_terms = {}
    total_concept_count = 0

    # download context file for json-ld files of concepts used
    # try to open the url and get the pointed to file
    try:
        # open url and get file
        opener = url.urlopen(CONTEXT)
        # write temporary file to disk and use for stats
        temp = tempfile.NamedTemporaryFile(delete=False)
        temp.write(opener.read())
        temp.close()
        context_file = temp.name
    except:
        print("ERROR! Can't open url: %s" % CONTEXT)
        exit()
    # load downloaded context
    with open(context_file) as context_data:
        context = json.load(context_data)

    # Retrieve cognitive atlas concepts and disorders
    cogatlas_concepts = get_concept(silent=True)
    cogatlas_disorders = get_disorder(silent=True)

    # for each input directory
    for direct in args.jsonld:
        # find *.jsonld files
        files = glob2.glob(direct + '/**/*.jsonld', recursive=True)
        # loop through each file and get isAbout terms
        for file in files:
            # read file with json
            # open the file as a dictionary
            print("opening file: %s" % file)
            with open(file) as dct:
                json_tmp = json.load(dct)

            if type(json_tmp['terms']) is dict:
                # for each key (term) in jsonld file, check for isAbout property
                for term in json_tmp['terms'].keys():
                    # expanded = jsonld.expand(json_tmp[term])
                    # for jsonld files with only a single term we have a simple dictionary where the term label isn't
                    # the highest-level key so we handle differently
                    if term == 'isAbout':
                        if isinstance(json_tmp['terms'][term], list):
                            # if not a dictionary then a list of dictionaries
                            for isabout_entry in json_tmp['terms'][term][
                                    'isAbout']:

                                # add concept to dictionary
                                total_concept_count = add_to_dict(
                                    id, isabout_entry, isAbout_terms,
                                    total_concept_count)

                                if INTERLEX_URI_FRAG in isabout_entry['@id']:
                                    # for storing concept as json-ld file
                                    concept_jsonld = get_interlex_concept_properties(
                                        isabout_entry['@id'], context)
                                elif (COGATLAS_URI_FRAG
                                      in isabout_entry['@id']) and (
                                          "task" not in isabout_entry['@id']):
                                    concept_jsonld = get_cogatlas_properties(
                                        isabout_entry, context,
                                        cogatlas_concepts, cogatlas_disorders)
                                elif "task" in isabout_entry['@id']:
                                    concept_jsonld = get_cogatlas_task_properties(
                                        isabout_entry, context)

                                else:
                                    continue
                                # write concept jsonld file
                                write_jsonld(concept_jsonld,
                                             args.jsonld_output_dir)

                        # else it's a dictionary with a single isAbout entry
                        else:
                            total_concept_count = add_to_dict(
                                id, json_tmp['terms'][term], isAbout_terms,
                                total_concept_count)
                            if INTERLEX_URI_FRAG in json_tmp['terms'][term]:
                                # for storing concept as json-ld file
                                concept_jsonld = get_interlex_concept_properties(
                                    json_tmp['terms'][term]['@id'], context)
                            elif COGATLAS_URI_FRAG in json_tmp['terms'][term]['@id'] and \
                                ("task" not in json_tmp['terms'][term]['@id']):
                                concept_jsonld = get_cogatlas_properties(
                                    json_tmp['terms'][term], context,
                                    cogatlas_concepts, cogatlas_disorders)
                            elif "task" in json_tmp['terms'][term]:
                                concept_jsonld = get_cogatlas_task_properties(
                                    json_tmp['terms'][term], context)

                            else:
                                continue
                            # write concept jsonld file
                            write_jsonld(concept_jsonld,
                                         args.jsonld_output_dir)

            elif type(json_tmp['terms']) is list:
                for term in json_tmp['terms']:
                    # expanded = jsonld.expand(json_tmp[term])
                    # for jsonld files with only a single term we have a simple dictionary where the term label isn't
                    # the highest-level key so we handle differently
                    for property in term:
                        if property == 'isAbout':
                            # for each concept in isAbout property
                            if isinstance(term[property], list):
                                for isabout_entry in term[property]:
                                    total_concept_count = add_to_dict(
                                        id, isabout_entry, isAbout_terms,
                                        total_concept_count)
                                    if INTERLEX_URI_FRAG in isabout_entry[
                                            '@id']:
                                        # for storing concept as json-ld file
                                        concept_jsonld = get_interlex_concept_properties(
                                            isabout_entry['@id'], context)
                                    elif COGATLAS_URI_FRAG in isabout_entry['@id'] and \
                                            ("task" not in isabout_entry['@id']):
                                        concept_jsonld = get_cogatlas_properties(
                                            isabout_entry, context,
                                            cogatlas_concepts,
                                            cogatlas_disorders)
                                    elif "task" in isabout_entry['@id']:
                                        concept_jsonld = get_cogatlas_task_properties(
                                            isabout_entry['@id'],
                                            isabout_entry['label'], context)

                                    else:
                                        continue
                                    # write concept jsonld file
                                    write_jsonld(concept_jsonld,
                                                 args.jsonld_output_dir)

                            else:
                                total_concept_count = add_to_dict(
                                    id, term[property], isAbout_terms,
                                    total_concept_count)
                                if INTERLEX_URI_FRAG in term[property]['@id']:
                                    # for storing concept as json-ld file
                                    concept_jsonld = get_interlex_concept_properties(
                                        term[property]['@id'], context)
                                elif COGATLAS_URI_FRAG in term[property]['@id'] and \
                                            ("task" not in term[property]['@id']):
                                    concept_jsonld = get_cogatlas_properties(
                                        term[property], context,
                                        cogatlas_concepts, cogatlas_disorders)
                                elif "task" in term[property]['@id']:
                                    concept_jsonld = get_cogatlas_task_properties(
                                        term[property]['@id'],
                                        term[property]['label'], context)
                                else:
                                    continue
                                # write concept jsonld file
                                write_jsonld(concept_jsonld,
                                             args.jsonld_output_dir)

    # open markdown txt file
    md_file = open(args.outfile, "w")
    ## Added by NQ to test GitHub Actions
    print('opening output file in', args.outfile)
    # set up header of table
    md_file.write("| concept URL | label | use frequency (%) |\n")
    md_file.write("| ----------- | ----- | ----------------- |\n")

    # now cycle through isAbout_terms dictionary and compute frequencies
    for key in isAbout_terms.keys():
        isAbout_terms[key]['freq'] = (isAbout_terms[key]['count'] /
                                      total_concept_count) * 100.0

    res = OrderedDict(
        sorted(isAbout_terms.items(),
               key=lambda x: getitem(x[1], 'freq'),
               reverse=True))

    # write markdown table sorted
    for key in res.keys():
        # add to markdown table file
        md_file.write("| %s | %s | %f |\n" %
                      (key, res[key]['label'], res[key]['freq']))

    ##Added by NQ to show that the code finished running
    print('File has been successfully written in', md_file)

    md_file.close()

    # if a single-file jsonld file already exists than add these terms to it else create a new one
    output_dir = os.path.split(args.jsonld_output_dir)[0]
    if isfile(join(output_dir, "NIDM_Concepts.jsonld")):
        cmd = "python " + join(sys.path[0], "combinebidsjsonld.py") + " -inputDir " + args.jsonld_output_dir + " -outputDir " + \
              join(output_dir, "NIDM_Concepts.jsonld") + " -association \"NIDM\"" + " -jsonld " + \
              join(output_dir, "NIDM_Concepts.jsonld")
    else:
        cmd = "python " + join(sys.path[0], "combinebidsjsonld.py") + " -inputDir " + args.jsonld_output_dir + " -outputDir " + \
              join(output_dir, "NIDM_Concepts.jsonld") + " -association \"NIDM\""

    print(cmd)
    system(cmd)