コード例 #1
0
def main():
    efo_cell_og, x, y = load_ontology.load("11")
    #efo_disease_og, x,y = load_ontology.load("3")
    #efo_anatomy_og, x,y, = load_ontology.load("14")
    #efo_og, x,y = load_ontology.load("13")
    cl_uberon_doid_og, x, y = load_ontology.load("0")
    efo_cellline_og, x, y = load_ontology.load("10")
    cvcl_og, x, y = load_ontology.load("4")

    term_to_linkedsup_terms = term_to_linked_superterms(cl_uberon_doid_og)
    #term_to_linkedsup_terms = term_to_linked_superterms(efo_og)
    term_to_linkedsup_terms.update(term_to_linked_superterms(efo_cell_og))
    term_to_linkedsup_terms.update(term_to_linked_superterms(efo_cellline_og))
    term_to_linkedsup_terms.update(term_to_linked_superterms(cvcl_og))
    #term_to_linkedsup_terms.update(term_to_linked_superterms(efo_disease_og))
    #term_to_linkedsup_terms.update(term_to_linked_superterms(efo_anatomy_og))

    # Remove known incorrect mappings
    if "CVCL:1240" in term_to_linkedsup_terms["EFO:0003045"]:
        term_to_linkedsup_terms["EFO:0003045"].remove("CVCL:1240")

    with open("term_to_superterm_linked_terms.json", "w") as f:
        f.write(
            json.dumps(term_to_linkedsup_terms,
                       indent=4,
                       sort_keys=True,
                       separators=(',', ': ')))
コード例 #2
0
def main():
    doid_disease_og, x,y = load_ontology.load("2")
    efo_disease_og, x,y = load_ontology.load("3")
    efo_cellline_og, x,y = load_ontology.load("10")

    print "Generating cell line to disease implications..."
    term_to_implications = generate_implications(efo_disease_og, efo_cellline_og)
    temp = generate_implications(doid_disease_og, efo_cellline_og)
    for term, implied_terms in temp.iteritems():
        term_to_implications[term] += implied_terms
    with open("cellline_to_disease_implied_terms.json", "w") as f:
        f.write(json.dumps(term_to_implications, indent=4, separators=(',', ': '), sort_keys=True))
コード例 #3
0
def efo_cvcl_syns():
    """
    Use the Cellosaurus to generate extra cell-line synonyms
    for the EFO.
    """
    og, x, y = load_ontology.load("10")
    syn_sets = None
    with open("../map_sra_to_ontology/synonym_sets/cvcl_syn_sets.json",
              "r") as f:
        syn_sets = [Set(x) for x in json.load(f)]

    # Add synonyms
    total_terms = len(og.get_mappable_terms())
    c = 1
    term_id_to_syns = defaultdict(lambda: [])
    for term in og.get_mappable_terms():

        print "Adding synonyms to term %d/%d with id %s" % (c, total_terms,
                                                            term.id)
        c += 1
        for syn_set in syn_sets:
            current_term_strs = [x.syn_str for x in term.synonyms]
            current_term_strs.append(term.name)
            current_term_strs = Set(current_term_strs)

            for c_str in current_term_strs:
                if c_str in syn_set:
                    for syn in syn_set:
                        if syn not in current_term_strs and syn not in Set(
                                term_id_to_syns[term.id]):
                            print "Added synonym %s to term with name %s" % (
                                syn, term.name)
                            term_id_to_syns[term.id].append(syn)
    return term_id_to_syns
コード例 #4
0
def main():
    og, x, y = load_ontology.load("13")
    problematic_terms = deque()
    for t_id, term in og.id_to_term.items():
        if "carcinoma" in term.name or "adenocarcinoma" in term.name:
            problematic_terms.append(term)

    term_to_syns = {}
    for term in problematic_terms:
        term_to_syns[term.id] = {"name": term.name, "synonyms":[x.syn_str for x in term.synonyms]}

    with open("candidate_term_to_remove_synonyms.json", "w") as f:
        f.write(json.dumps(term_to_syns, indent=4, sort_keys=True, separators=(',', ': ')))
コード例 #5
0
def uncaps_EFO_syns():
    """
    For all synonyms in the EFO, check if the first character is
    upper case (and only the first character). If so, convert to
    lower case.
    """
    def uncap_str(r_str):
        tokens = r_str.split()
        new_tokens = []
        for tok in tokens:
            if len(tok) == 1:
                new_tokens.append(tok)
            else:
                first_upper = tok[0].isupper()
                rest_lower = True
                for t in tok[1:]:
                    if t.isupper():
                        rest_lower = False

                if first_upper and rest_lower:
                    new_tokens.append(tok[0].lower() + tok[1:])
                else:
                    new_tokens.append(tok)
        return " ".join(new_tokens)

    og, x, y = load_ontology.load("9")
    term_id_to_syns = defaultdict(lambda: [])
    for term in og.id_to_term.values():

        print "Looking at term %s" % term.id

        # Names of term
        ref_strs = [term.name]
        ref_strs += [x.syn_str for x in term.synonyms]
        ref_strs = Set(ref_strs)

        for r_str in ref_strs:
            new_str = uncap_str(r_str)
            if new_str not in ref_strs:
                print "Derived '%s' from '%s'" % (new_str, r_str)
                term_id_to_syns[term.id].append(new_str)

    return term_id_to_syns
コード例 #6
0
def main():
    parser = OptionParser()
    #parser.add_option("-f", "--key_value_file", help="JSON file storing key-value pairs describing sample")
    (options, args) = parser.parse_args()

    input_f = args[0]

    # Map key-value pairs to ontologies
    with open(input_f, "r") as f:
        tag_to_vals = json.load(f)

    # Load ontologies
    ont_name_to_ont_id = {
        "UBERON": "12",
        "CL": "1",
        "DOID": "2",
        "EFO": "16",
        "CVCL": "4"
    }
    ont_id_to_og = {
        x: load_ontology.load(x)[0]
        for x in ont_name_to_ont_id.values()
    }
    pipeline = p_53()

    all_mappings = []
    for tag_to_val in tag_to_vals:
        sample_acc_to_matches = {}
        mapped_terms, real_props = pipeline.run(tag_to_val)
        mappings = {
            "mapped_terms": [x.to_dict() for x in mapped_terms],
            "real_value_properties": [x.to_dict() for x in real_props]
        }
        all_mappings.append(mappings)

    outputs = []
    for tag_to_val, mappings in zip(tag_to_vals, all_mappings):
        outputs.append(
            run_pipeline_on_key_vals(tag_to_val, ont_id_to_og, mappings))
    print json.dumps(outputs, indent=4, separators=(',', ': '))
コード例 #7
0
def main():
    efo_celltype_og, x, y = load_ontology.load("11")
    cl_celltype_og, x, y = load_ontology.load("1")

    doid_disease_og, x, y = load_ontology.load("2")
    efo_disease_og, x, y = load_ontology.load("3")

    efo_anatomy_og, x, y = load_ontology.load("14")
    uberon_anatomy_og, x, y = load_ontology.load("5")

    efo_cellline_og, x, y = load_ontology.load("10")
    cvcl_og, x, y = load_ontology.load("4")

    term_to_linked_terms = {}
    term_to_linked_terms.update(linked_terms(efo_celltype_og, cl_celltype_og))
    term_to_linked_terms.update(linked_terms(efo_disease_og, doid_disease_og))
    term_to_linked_terms.update(linked_terms(efo_anatomy_og,
                                             uberon_anatomy_og))
    term_to_linked_terms.update(linked_terms(cl_celltype_og, efo_celltype_og))
    term_to_linked_terms.update(linked_terms(doid_disease_og, efo_disease_og))
    term_to_linked_terms.update(linked_terms(uberon_anatomy_og,
                                             efo_anatomy_og))
    term_to_linked_terms.update(
        linked_terms(efo_cellline_og,
                     cvcl_og,
                     link_syn_types=["EXACT", "RELATED"]))
    term_to_linked_terms.update(
        linked_terms(cvcl_og,
                     efo_cellline_og,
                     link_syn_types=["EXACT", "RELATED"]))

    with open("term_to_linked_terms.json", "w") as f:
        f.write(
            json.dumps(term_to_linked_terms,
                       indent=4,
                       sort_keys=True,
                       separators=(',', ': ')))
コード例 #8
0
def main():
    # add main args to parse
    parser = argparse.ArgumentParser(
        description='Arguments for run_pipeline.py')
    parser.add_argument("--fnvread",
                        type=str,
                        required=True,
                        default=None,
                        help='Paths of files to read, separated with ";".')
    parser.add_argument("--fnvwrite",
                        type=str,
                        required=True,
                        default=None,
                        help='Paths of files to write, separated with ";".')
    args = parser.parse_args()
    # start timer
    t1 = default_timer()
    parser = OptionParser()
    # parse args
    print str(args)
    input_fl = args.fnvread.split(";")
    print "detected " + str(len(input_fl)) + " files to read"
    # summarize detected files
    input_fl = args.fnvread.split(";")
    print "detected " + str(len(input_fl)) + " files to read"
    write_fl = args.fnvwrite.split(";")
    print "detected " + str(len(write_fl)) + " files to write"
    tagsl = []
    for infile in input_fl:
        with open(infile, "r") as opf:
            tagsl.append(json.load(opf))
    # ** do main ontology loads outside of main loop
    ont_name_to_ont_id = {
        "UBERON": "12",
        "CL": "1",
        "DOID": "2",
        "EFO": "16",
        "CVCL": "4"
    }
    ont_id_to_og = {
        x: load_ontology.load(x)[0]
        for x in ont_name_to_ont_id.values()
    }
    pipeline = p_48()
    # do main loop on samples (works for up to 500, then string too long)
    for ii, tag_to_vals in enumerate(tagsl):
        write_f = write_fl[ii]
        # Designate file to write output to
        # Map key-value pairs to ontologies
        pipeline = p_48()
        all_mappings = []
        for tag_to_val in tag_to_vals:
            sample_acc_to_matches = {}
            mapped_terms, real_props = pipeline.run(tag_to_val)
            mappings = {
                "mapped_terms": [x.to_dict() for x in mapped_terms],
                "real_value_properties": [x.to_dict() for x in real_props]
            }
            all_mappings.append(mappings)
        outputs = []
        for tag_to_val, mappings in zip(tag_to_vals, all_mappings):
            outputs.append(
                run_pipeline_on_key_vals(tag_to_val, ont_id_to_og, mappings))
        # use json.dump to stream json to outfile, where fn is second arg
        with open(write_f, 'w') as outfile:
            json.dump(obj=outputs,
                      fp=outfile,
                      indent=4,
                      separators=(',', ': '))
        # use json.dumps to return json output as a string, to console
        print json.dumps(outputs, indent=4, separators=(',', ': '))
        print "time elapsed = " + str(default_timer() - t1)
コード例 #9
0
import json
from collections import defaultdict
import sqlite3

import map_sra_to_ontology
from map_sra_to_ontology import load_ontology

ONT_NAME_TO_ONT_ID = {
    "UBERON": "12",
    "CL": "1",
    "DOID": "2",
    "EFO": "16",
    "CVCL": "4"
}
ONT_ID_TO_OG = {
    x: load_ontology.load(x)[0]
    for x in ONT_NAME_TO_ONT_ID.values()
}


def main():
    parser = OptionParser()
    #parser.add_option("-a", "--a_descrip", action="store_true", help="This is a flat")
    #parser.add_option("-b", "--b_descrip", help="This is an argument")
    (options, args) = parser.parse_args()

    mappings_f = args[0]
    sample_type_predictions_f = args[1]
    out_json = args[2]
    out_sql = args[3]
コード例 #10
0
    "CL": "1",
    "DOID": "2",
    "EFO": "16",
    "CVCL": "4",
    "ORDO": "19",
    "UBERON_all": "5",
    "UO": "7",
    "EFO_all": "9"
}
# ont_id_to_og = {x: load_ontology.load(x)[0]
#                 for x in list(ont_name_to_ont_id.values())}
ont_id_to_og = {}
for ont_name in list(ont_name_to_ont_id.keys()):
    sys.stderr.write('[{}] {}\n'.format(datetime.datetime.now(), ont_name))
    id = ont_name_to_ont_id[ont_name]
    ont_id_to_og[id] = load_ontology.load(id)[0]
pipeline = p_48(ont_id_to_og)

del ont_name_to_ont_id["UBERON_all"], ont_name_to_ont_id[
    "UO"], ont_name_to_ont_id["EFO_all"]
del ont_id_to_og["5"], ont_id_to_og["7"], ont_id_to_og["9"]

sys.stderr.write('[{}] dill dump\n'.format(datetime.datetime.now()))
#dill.dump_session('pipeline_init.dill')
# with open("pipeline_init.dill", "wb") as f:
#     dill.dump((pipeline, ont_id_to_og), f)
with open("pipeline_init.pickle", "wb") as f:
    pickle.dump((pipeline, ont_id_to_og), f)

sys.stderr.write('[{}] Done.\n'.format(datetime.datetime.now()))