예제 #1
0
def learn(resource, input, outdir, target_assocfile, target_ontology,
          target_root_class):
    """
    Learn association rules
    """
    logging.basicConfig(level=logging.INFO)

    afa = AssociationSetFactory()
    ofa = OntologyFactory()

    ont = ofa.create(resource)
    aset = afa.create_from_file(file=input, ontology=ont, fmt=None)

    learner = ol.OntologyLearner(assocs=aset)
    isa_ont = ont.subontology(relations=['subClassOf'])

    if target_root_class:
        learner.split_assocs(target_root_class, ontology=isa_ont)

    if target_ontology:
        learner.target_ontology = ofa.create(target_ontology)
    if target_assocfile:
        tont = ont
        if learner.target_ontology is not None:
            tont = learner.target_ontology
        learner.target_assocs = afa.create_from_file(target_assocfile,
                                                     ontology=tont,
                                                     fmt=None)

    with open(outdir + '/index.md', 'w') as file:
        learner.fit_all(dir=outdir, reportfile=file)
예제 #2
0
def test_merge():
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    ont2 = Ontology()
    ont2.merge([ont])
    assert ont2.xref_graph is not None
예제 #3
0
def test_lexmap_multi():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    files = ['x', 'm', 'h', 'bto']
    onts = [
        factory.create('tests/resources/autopod-{}.json'.format(f))
        for f in files
    ]
    lexmap = LexicalMapEngine()
    lexmap.index_ontologies(onts)
    #print(lexmap.lmap)
    #print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    for x, y, d in g.edges_iter(data=True):
        cl = nx.ancestors(g, x)
        print("{} '{}' <-> {} '{}' :: {} CLOSURE={}".format(
            x, lexmap.label(x), y, lexmap.label(y), d, len(cl)))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0
    unmapped = lexmap.unmapped_nodes(g)
    print('U: {}'.format(len(unmapped)))
    unmapped = lexmap.unmapped_nodes(g, rs_threshold=4)
    print('U4: {}'.format(len(unmapped)))

    cliques = lexmap.cliques(g)
    maxc = max(cliques, key=len)
    print('CLIQUES: {}'.format(cliques))
    print('MAX CLIQUES: {}'.format(maxc))
    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))
예제 #4
0
def test_subontology():
    """
    subontology
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('go')
    assert ont.is_obsolete('GO:0000267') == True
    print("ONT NODES: {}".format(ont.nodes()))
    subont = ont.subontology(relations=['subClassOf'])
    PERM = 'GO:1990578'
    print("NODES: {}".format(subont.nodes()))
    ancs = subont.ancestors(PERM, reflexive=True)
    print(str(ancs))
    for a in ancs:
        print(" ANC: {} '{}'".format(a, subont.label(a)))
    assert len(ancs) > 0

    assert subont.is_obsolete('GO:0000267') == True

    w = GraphRenderer.create('tree')
    w.write_subgraph(ont, ancs)

    # TODO: sub-ontology does not create
    # full metadata
    w = GraphRenderer.create('obo')
    w.write_subgraph(ont, ancs)
 def test_set_ontology(self):
     ontology = OntologyFactory().create()
     for i in range(4):
         ontology.add_node(i, 'node' + str(i))
     ontology.add_parent(1, 0)
     ontology.add_parent(2, 0)
     ontology.add_parent(3, 0)
     self.df.set_ontology(ontology_type=DataType.GO, ontology=ontology, config=self.conf_parser)
     self.assertTrue(list(self.df.go_ontology.nodes()) == list(ontology.nodes()))
예제 #6
0
def test_dynamic_query():
    """
    Dynamic query
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('pato')

    ids = ont.sparql(body="{?x rdfs:subClassOf+ "+SHAPE+"}",
                     inject_prefixes = ont.prefixes(),
                     single_column=True)
    assert Y_SHAPED in ids
    assert ABSENT not in ids
예제 #7
0
 def load_from_files(self, files: List[str]) -> None:
     """
     loads an ontology from an obojson file
     :param files: list of fils in obojson format
     :return:
     """
     factory = OntologyFactory()
     ont = None
     for file in files:
         if ont == None:
             ont = factory.create(file)
         else:
             ont.merge(factory.create(file))
     self.ontology = ont
예제 #8
0
def test_expand():
    factory = OntologyFactory()
    ontobj = factory.create("tests/resources/goslim_pombe.json")
    expand_tsv(INPUT,
               ontology=ontobj,
               outfile=open(OUTPUT, "w"),
               cols=["term"])
    reader = csv.DictReader(open(OUTPUT, "r"), delimiter='\t')
    n = 0
    for row in reader:
        if row['term'] == 'GO:0002181':
            assert row['term_label'] == 'cytoplasmic translation'
            n += 1
        if row['term'] == 'FAKE:123':
            assert row['term_label'] == ''
            n += 1
    assert n == 2
예제 #9
0
def test_lexmap_basic():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    lexmap = LexicalMapEngine()
    lexmap.index_ontology(ont)

    print(lexmap.lmap)
    print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    assert g.has_edge('Z:2', 'ZZ:2')  # roman numerals
    assert g.has_edge('Z:2', 'Y:2')  # case insensitivity
    assert g.has_edge('A:1', 'B:1')  # synonyms
    assert g.has_edge('B:1', 'A:1')  # bidirectional
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0

    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))

    lexmap = LexicalMapEngine(
        config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)],
                    normalized_form_confidence=0.25,
                    abbreviation_confidence=0.5,
                    meaningful_ids=True,
                    ontology_configurations=[
                        dict(prefix='AA', normalized_form_confidence=-1000)
                    ]))

    assert len(lexmap._get_config_val('NULL', 'synsets')) == 1
    assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo'
    assert lexmap._normalize_label('replaceme foo',
                                   {'replaceme': 'zz'}) == 'foo zz'

    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym')
    ont.add_synonym(syn)
    ont.add_node('http://x.org/wiz#FooBar')
    ont.add_node('TEST:6', '123')
    ont.add_node('TEST:7', '123')
    ont.add_node('TEST:8', 'bar ignoreme foo')
    ont.add_node('AA:1', 'foo bar')
    ont.add_node('AA:2', 'bar foo')
    ont.add_node('ABBREV:1', 'ABCD')
    ont.add_node('ABBREV:2', 'ABCD')
    for s in ont.synonyms('TEST:4'):
        print('S={}'.format(s))
    lexmap.index_ontology(ont)
    g = lexmap.get_xref_graph()
    for x, d in g['TEST:1'].items():
        print('XREF: {} = {}'.format(x, d))
    assert g.has_edge('TEST:1', 'TEST:2')  # normalized
    logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2']))
    assert int(g['TEST:1']['TEST:2']['score']) == 25
    assert int(g['TEST:1']['TEST:3']['score']) == 100
    assert int(g['TEST:1']['TEST:4']['score']) < 25
    assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar')  # IDs and CamelCase
    assert not g.has_edge('TEST:6',
                          'TEST:7')  # should omit syns with no alphanumeric

    # test exclude normalized form
    assert not g.has_edge('AA:1', 'AA:2')

    # test custom synsets are used
    assert g.has_edge('TEST:8', 'TEST:2')
    assert g.has_edge('TEST:8', 'AA:2')
    assert not g.has_edge('TEST:8', 'AA:1')  # do not normalize AAs

    assert lexmap.smap['ABBREV:1'][0].is_abbreviation()
    assert lexmap.smap['ABBREV:2'][0].is_abbreviation()
    assert g.has_edge('ABBREV:1', 'ABBREV:2')
    assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25

    df = lexmap.unmapped_dataframe(g)
    print(df.to_csv())
예제 #10
0
def test_remote_sparql_pato():
    """
    Load ontology from remote SPARQL endpoint
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('pato')

    ploidy = ont.node(PLOIDY)
    print("PLOIDY: {}".format(ploidy))
    assert ont.label(PLOIDY) == 'ploidy'

    # exact match
    search_results = ont.search('shape')
    print("SEARCH (exact): {}".format(search_results))
    assert [SHAPE] == search_results

    # implicit regexp
    search_results = ont.search('%shape%')
    print("SEARCH (re, implicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results)>10

    # explicit regexp
    search_results = ont.search('.*shape.*', is_regex=True)
    print("SEARCH (re, explicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results)>10
    
    # syns
    syn = 'cone-shaped'
    search_results = ont.search(syn, synonyms=False)
    print("SEARCH (no syns): {}".format(search_results))
    assert [] == search_results
    #search_results = ont.search(syn, synonyms=True)
    #print("SEARCH (with syns): {}".format(search_results))
    #assert [CONICAL] == search_results
    
    num_nodes = 0
    for n in ont.nodes():
        num_nodes = num_nodes+1
    assert num_nodes > 100

    ancs = ont.ancestors(PLOIDY)
    print("ANCS ploidy (all): {}".format(ancs))
    assert QUALITY in ancs
    assert PENTAPLOID not in ancs

    ancs = ont.ancestors(PLOIDY, relations=['subClassOf'])
    print("ANCS ploidy (subClassOf): {}".format(ancs))
    assert QUALITY in ancs
    assert PENTAPLOID not in ancs

    # this is a non-use case
    ancs = ont.ancestors(SWOLLEN, relations=[HAS_PART])
    print("ANCS swollen (has_part): {}".format(ancs))
    assert INCREASED_SIZE in ancs
    assert PROTRUDING in ancs
    assert len(ancs) == 2

    ancs = ont.ancestors(SWOLLEN, relations=['subClassOf'])
    print("ANCS swollen (has_part): {}".format(ancs))
    assert MORPHOLOGY in ancs
    assert QUALITY in ancs
    assert PROTRUDING not in ancs
    
    decs = ont.descendants(PLOIDY)
    print("DECS ploidy (all): {}".format(decs))
    assert QUALITY not in decs
    assert EUPLOID in decs
    assert PENTAPLOID in decs

    # this is a non-use case
    ancs = ont.descendants(INCREASED_SIZE, relations=[HAS_PART])
    print("ANCS increased size (has part): {}".format(ancs))
    assert SWOLLEN in ancs
    assert len(ancs) == 1

    subsets = ont.subsets()
    print("SUBSETS: {}".format(subsets))

    slim = ont.extract_subset('absent_slim')
    print("SLIM: {}".format(slim))
    assert ABSENT in slim
    assert QUALITY not in slim

    syns = ont.synonyms(INCREASED_SIZE)
    print("SYNS: {}".format(syns))
    syn_vals = [syn.val for syn in syns]
    assert 'big' in syn_vals
    [bigsyn] = [syn for syn in syns if syn.val=='big']
    # TODO xrefs
    assert not bigsyn.exact_or_label()
    assert bigsyn.scope() == 'RELATED'

    w = GraphRenderer.create('obo')
    w.write_subgraph(ont, [INCREASED_SIZE])
예제 #11
0
    def get_generators(self, filepath, batch_size):
        """Get Generators"""

        ont = OntologyFactory().create(filepath)
        parsed_line = ont.graph.copy().node

        do_term_list = []
        do_isas_list = []
        do_synonyms_list = []
        do_alt_ids_list = []
        xrefs = []
        counter = 0

        # Convert parsed obo term into a schema-friendly AGR dictionary.
        for key, line in parsed_line.items():
            counter = counter + 1
            node = ont.graph.node[key]
            if len(node) == 0:
                continue

            # Switching id to curie form and saving URI in "uri"
            # - might wildly break things later on???
            node["uri"] = node["id"]
            node["id"] = key

            syns = []

            local_id = None
            def_links_unprocessed = []
            def_links_processed = []
            subset = []
            definition = ""
            is_obsolete = "false"
            ident = key
            prefix = ident.split(":")[0]

            if "meta" in node:
                if "synonyms" in node["meta"]:
                    syns = [s["val"] for s in node["meta"]["synonyms"]]
                    for synonym in syns:
                        do_synonym = {
                            "primary_id": key,
                            "synonym": synonym
                        }
                        do_synonyms_list.append(do_synonym)

                if "basicPropertyValues" in node["meta"]:
                    alt_ids = [s["val"] for s in node["meta"]["basicPropertyValues"]]
                    for alt_id in alt_ids:
                        if "DOID:" in alt_id:
                            secondary_id = {
                                "primary_id": key,
                                "secondary_id": alt_id
                            }
                            do_alt_ids_list.append(secondary_id)

                if "xrefs" in node["meta"]:
                    o_xrefs = node["meta"].get('xrefs')
                    if o_xrefs is not None:
                        for xref_id_dict in o_xrefs:
                            xref_id = xref_id_dict["val"]
                            if ":" in xref_id:
                                local_id = xref_id.split(":")[1].strip()
                                prefix = xref_id.split(":")[0].strip()
                                complete_url = ETLHelper.get_complete_url_ont(local_id, xref_id)
                                generated_xref = ETLHelper.get_xref_dict(local_id, 
                                    prefix,
                                    "ontology_provided_cross_reference",
                                    "ontology_provided_cross_reference",
                                    xref_id,
                                    complete_url,
                                    xref_id + "ontology_provided_cross_reference")
                                generated_xref["oid"] = ident
                                xrefs.append(generated_xref)
                        else: #TODO Need to make sure this else is correct
                            if ":" in o_xrefs:
                                local_id = o_xrefs.split(":")[1].strip()
                                prefix = o_xrefs.split(":")[0].strip()
                                complete_url = ETLHelper.get_complete_url_ont(local_id, o_xrefs)
                                generated_xref = ETLHelper.get_xref_dict(local_id,
                                        prefix,
                                        "ontology_provided_cross_reference",
                                        "ontology_provided_cross_reference",
                                        o_xrefs,
                                        complete_url,
                                        o_xrefs)
                                generated_xref["oid"] = ident
                                xrefs.append(generated_xref)
                if node["meta"].get('is_obsolete'):
                    is_obsolete = "true"
                elif node["meta"].get('deprecated'):
                    is_obsolete = "true"
                if "definition" in node["meta"]:
                    definition = node["meta"]["definition"]["val"]
                    def_links_unprocessed = node["meta"]["definition"]["xrefs"]
                if "subsets" in node["meta"]:
                    new_subset = node['meta'].get('subsets')
                    if isinstance(new_subset, (list, tuple)):
                        subset = new_subset
                    else:
                        if new_subset is not None:
                            subset.append(new_subset)
                if len(subset) > 1:
                    converted_subsets = []
                    for subset_str in subset:
                        if "#" in subset_str:
                            subset_str = subset_str.split("#")[-1]
                        converted_subsets.append(subset_str)
                    subset = converted_subsets

            all_parents = ont.parents(key)
            all_parents.append(key)

            # Improves performance when traversing relations
            all_parents_subont = ont.subontology(all_parents)
            isas_without_names = all_parents_subont.parents(key, relations=['subClassOf'])

            for item in isas_without_names:
                dictionary = {
                    "primary_id": key,
                    "primary_id2": item
                }

                do_isas_list.append(dictionary)

            def_links_processed = []
            def_links = ""
            if definition is None:
                definition = ""
            else:
                # Remove new lines that cause this to split across two lines in the file
                # definition = definition.replace('\n', ' ')

                # Remove any extra double space that might have been introduces in the last replace
                # definition = definition.replace('  ', ' ')

                if definition is not None and "\"" in definition:
                    split_definition = re.split(r'(?<!\\)"', definition)
                    if len(split_definition) > 1:
                        if len(split_definition) > 2 and "[" in split_definition[2].strip():
                            def_links = split_definition[2].strip()
                            def_links = def_links.rstrip("]").replace("[", "")
                            def_links_unprocessed.append(def_links)

            for def_link in def_links_unprocessed:
                def_link = def_link.replace("url:www", "http://www")
                def_link = def_link.replace("url:", "")
                def_link = def_link.replace("URL:", "")
                def_link = def_link.replace("\\:", ":")
                def_link = def_link.replace('\\', '')

                if "," in def_link:
                    def_link = def_link.split(",")
                    for link in def_link:
                        if link.strip().startswith("http"):
                            def_links_processed.append(link)
                else:
                    if def_link.strip().startswith("http"):
                        def_links_processed.append(def_link)

            # TODO: make this a generic section based on the resourceDescriptor.yaml file.
            # need to have MODs add disease pages to their yaml stanzas


            alt_ids = node.get('alt_id')
            if alt_ids:
                if not isinstance(alt_ids, (list, tuple)):
                    alt_ids = [alt_ids]
            else:
                alt_ids = []

            dict_to_append = {
                'oid': node['id'],
                'name': node.get('label'),
                'name_key': node.get('label'),
                'definition': definition,
                'defLinksProcessed': def_links_processed,
                'is_obsolete': is_obsolete,
                'subset': subset,
                'oUrl': "http://www.disease-ontology.org/?id=" + node['id'],
                'rgd_link': 'http://rgd.mcw.edu'
                            + '/rgdweb/ontology/annot.html?species=All&x=1&acc_id='
                            + node['id'] + '#annot',
                'rat_only_rgd_link': 'http://rgd.mcw.edu'
                                     + '/rgdweb/ontology/annot.html?species=Rat&x=1&acc_id='
                                     + node['id'] + '#annot',
                'human_only_rgd_link': 'http://rgd.mcw.edu'
                                       + '/rgdweb/ontology/annot.html?species=Human&x=1&acc_id='
                                       + node['id'] + '#annot',
                'mgi_link': 'http://www.informatics.jax.org/disease/' + node['id'],
                'zfin_link': 'https://zfin.org/' + node['id'],
                'flybase_link': 'http://flybase.org/cgi-bin/cvreport.html?id=' + node['id'],
                'wormbase_link': 'http://www.wormbase.org/resources/disease/' + node['id'],
                'sgd_link': 'https://yeastgenome.org/disease/' + node['id']
            }

            do_term_list.append(dict_to_append)

            if counter == batch_size:
                yield [do_term_list, do_isas_list, do_synonyms_list, xrefs, do_alt_ids_list]
                do_term_list = []
                do_isas_list = []
                do_synonyms_list = []
                do_alt_ids_list = []
                xrefs = []
                counter = 0

        if counter > 0:
            yield [do_term_list, do_isas_list, do_synonyms_list, xrefs, do_alt_ids_list]
예제 #12
0
    def get_data(self, filepath):  # noqa
        """Get Data."""
        ont = OntologyFactory().create(filepath)

        parsed_line = ont.graph.copy().node

        # Convert parsed obo term into a schema-friendly AGR dictionary.
        for key in parsed_line.items():
            node = ont.graph.node[key]
            if len(node) == 0:
                continue

            # Switching id to curie form and saving URI in "uri"
            # might wildly break things later on???
            node["uri"] = node["id"]
            node["id"] = key

            syns = []
            # So code commented out with NU: at start means it is Not Used.
            # NU: xrefs = []
            # NU: xref_urls = []

            # NU: def_links_unprocessed = []
            # NU: def_links_processed = []
            subset = []
            definition = ""
            namespace = ""
            is_obsolete = "false"
            # NU:ident = key

            if "meta" in node:
                if "synonyms" in node["meta"]:
                    syns = [s["val"] for s in node["meta"]["synonyms"]]
                # NU: leave in call commented out in case it is used at a later time
                # if "xrefs" in node["meta"]:
                #     o_xrefs = node["meta"].get('xrefs')
                #     self.ortho_xrefs(o_xrefs, ident, xref_urls)
                if node["meta"].get('is_obsolete'):
                    is_obsolete = "true"
                elif node["meta"].get('deprecated'):
                    is_obsolete = "true"
                if "definition" in node["meta"]:
                    definition = node["meta"]["definition"]["val"]
                    # NU: def_links_unprocessed = node["meta"]["definition"]["xrefs"]
                if "subsets" in node["meta"]:
                    new_subset = node['meta'].get('subsets')
                    if isinstance(new_subset, (list, tuple)):
                        subset = new_subset
                    else:
                        if new_subset is not None:
                            subset.append(new_subset)
                if len(subset) > 1:
                    converted_subsets = []
                    for subset_str in subset:
                        if "#" in subset_str:
                            subset_str = subset_str.split("#")[-1]
                        converted_subsets.append(subset_str)
                    subset = converted_subsets
                if "basicPropertyValues" in node['meta']:
                    for bpv in node['meta']['basicPropertyValues']:
                        if bpv.get('pred') == 'OIO:hasOBONamespace':
                            namespace = bpv.get('val')
                            break

            all_parents = ont.parents(key)
            all_parents.append(key)

            # Improves performance when traversing relations
            all_parents_subont = ont.subontology(all_parents)

            isas_without_names = all_parents_subont.parents(
                key, relations=['subClassOf'])
            partofs_without_names = all_parents_subont.parents(
                key, relations=['BFO:0000050'])
            regulates = all_parents_subont.parents(key,
                                                   relations=['RO:0002211'])
            negatively_regulates = all_parents_subont.parents(
                key, relations=['RO:0002212'])
            positively_regulates = all_parents_subont.parents(
                key, relations=['RO:0002213'])

            # NU: def_links_unprocessed = []
            # def_links = ""
            if definition is None:
                definition = ""
            # else:
            #     if definition is not None and "\"" in definition:
            #         split_definition = definition.split("\"")
            #         if len(split_definition) > 1:
            #             if len(split_definition) > 2 and "[" in split_definition[2].strip():
            #                 def_links = split_definition[2].strip()
            #                 def_links_unprocessed.append(def_links.rstrip("]").replace("[", ""))

            # NU: def_links_processed not used later, it is commented out.
            # for def_link_str in def_links_unprocessed:
            #     def_link_str = def_link_str.replace("url:www", "http://www")
            #     def_link_str = def_link_str.replace("url:", "")
            #     def_link_str = def_link_str.replace("URL:", "")
            #     def_link_str = def_link_str.replace("\\:", ":")

            #     if "," in def_link_str:
            #         def_links = def_link_str.split(",")
            #         for link in def_links:
            #             if link.strip().startswith("http"):
            #                 def_links_processed.append(link)
            #     else:
            #         if def_link_str.strip().startswith("http"):
            #             def_links_processed.append(def_link_str)

            # NU: alt_ids = node.get('alt_id')
            # if alt_ids:
            #    if not isinstance(alt_ids, (list, tuple)):
            #        alt_ids = [alt_ids]
            # else:
            #    alt_ids = []

            dict_to_append = {
                'o_type': namespace,
                'name': node.get('label'),
                'href':
                'http://amigo.geneontology.org/amigo/term/' + node['id'],
                'name_key': node.get('label'),
                'oid': node['id'],
                'definition': definition,
                'is_obsolete': is_obsolete,
                'subset': subset,
                'o_synonyms': syns,
                'isas': isas_without_names,
                'partofs': partofs_without_names,
                'regulates': regulates,
                'negatively_regulates': negatively_regulates,
                'positively_regulates': positively_regulates,

                # This data might be needed for gene descriptions
                # Maybe should be turned into a different method in order
                # to keep the go do dict's smaller
                # 'o_genes': [],
                # 'o_species': [],
                # 'xrefs': xrefs,
                # 'ontologyLabel': filepath,
                # TODO: fix links to not be passed for each ontology load.
                # 'rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html'\
                #              + '?species=All&x=1&acc_id='+node['id']+'#annot',
                # 'rgd_all_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                   + 'species=All&x=1&acc_id=' + node['id'] + '#annot',
                # 'rat_only_rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                       + 'species=Rat&x=1&acc_id=' +node['id'] + '#annot',
                # 'human_only_rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                        + 'species=Human&x=1&acc_id=' +node['id'] + '#annot',
                # 'mgi_link': 'http://www.informatics.jax.org/disease/'+node['id'],
                # 'wormbase_link': 'http://www.wormbase.org/resources/disease/'+node['id'],
                # 'sgd_link': 'https://yeastgenome.org/disease/'+node['id'],
                # 'flybase_link': 'http://flybase.org/cgi-bin/cvreport.html?id='+node['id'],
                # 'zfin_link': 'https://zfin.org/'+node['id'],
                # 'oUrl': "http://www.disease-ontology.org/?id=" + node['id'],
                # 'oPrefix': prefix,
                # 'crossReferences': xref_urls,
                # 'defText': def_text,
                # 'defLinksProcessed': def_links_processed,
                # 'oboFile': prefix,
                # 'category': 'go',
                # 'alt_ids': alt_ids,
            }

            if node['id'] == 'GO:0099616':
                self.logger.debug(dict_to_append)

            node = {**node, **dict_to_append}
            ont.graph.node[node["id"]] = node

        return ont
예제 #13
0
    def get_generators(self, filepath,
                       batch_size):  # noqa TODO:Needs splitting up really
        """Get Generators."""
        ont = OntologyFactory().create(filepath)
        parsed_line = ont.graph.copy().node

        do_term_list = []
        do_isas_list = []
        do_synonyms_list = []
        do_alt_ids_list = []
        xrefs = []
        counter = 0

        # Convert parsed obo term into a schema-friendly AGR dictionary.
        for key, line in parsed_line.items():
            counter = counter + 1
            node = ont.graph.node[key]
            if len(node) == 0:
                continue

            # Switching id to curie form and saving URI in "uri"
            # - might wildly break things later on???
            node["uri"] = node["id"]
            node["id"] = key

            syns = []

            def_links_unprocessed = []
            def_links_processed = []
            subset = []
            definition = ""
            is_obsolete = "false"
            ident = key

            if "meta" in node:
                if "synonyms" in node["meta"]:
                    syns = [s["val"] for s in node["meta"]["synonyms"]]
                    for synonym in syns:
                        do_synonym = {"primary_id": key, "synonym": synonym}
                        do_synonyms_list.append(do_synonym)

                if "basicPropertyValues" in node["meta"]:
                    alt_ids = [
                        s["val"] for s in node["meta"]["basicPropertyValues"]
                    ]
                    for alt_id in alt_ids:
                        if "DOID:" in alt_id:
                            secondary_id = {
                                "primary_id": key,
                                "secondary_id": alt_id
                            }
                            do_alt_ids_list.append(secondary_id)

                if "xrefs" in node["meta"]:
                    o_xrefs = node["meta"].get('xrefs')
                    self.ortho_xrefs(o_xrefs, ident, xrefs)

                if node["meta"].get('is_obsolete'):
                    is_obsolete = "true"
                elif node["meta"].get('deprecated'):
                    is_obsolete = "true"
                if "definition" in node["meta"]:
                    definition = node["meta"]["definition"]["val"]
                    def_links_unprocessed = node["meta"]["definition"]["xrefs"]
                if "subsets" in node["meta"]:
                    new_subset = node['meta'].get('subsets')
                    if isinstance(new_subset, (list, tuple)):
                        subset = new_subset
                    else:
                        if new_subset is not None:
                            subset.append(new_subset)
                if len(subset) > 1:
                    converted_subsets = []
                    for subset_str in subset:
                        if "#" in subset_str:
                            subset_str = subset_str.split("#")[-1]
                        converted_subsets.append(subset_str)
                    subset = converted_subsets

            all_parents = ont.parents(key)
            all_parents.append(key)

            # Improves performance when traversing relations
            all_parents_subont = ont.subontology(all_parents)
            isas_without_names = all_parents_subont.parents(
                key, relations=['subClassOf'])

            for item in isas_without_names:
                dictionary = {"primary_id": key, "primary_id2": item}

                do_isas_list.append(dictionary)

            def_links_processed = []
            def_links = ""
            if definition is None:
                definition = ""
            else:
                # Remove new lines that cause this to split across two lines in the file
                # definition = definition.replace('\n', ' ')

                # Remove any extra double space that might have been introduces in the last replace
                # definition = definition.replace('  ', ' ')

                if definition is not None and "\"" in definition:
                    split_definition = re.split(r'(?<!\\)"', definition)
                    if len(split_definition) > 1:
                        if len(split_definition
                               ) > 2 and "[" in split_definition[2].strip():
                            def_links = split_definition[2].strip()
                            def_links = def_links.rstrip("]").replace("[", "")
                            def_links_unprocessed.append(def_links)

            for def_link in def_links_unprocessed:
                def_link = def_link.replace("url:www", "http://www")
                def_link = def_link.replace("url:", "")
                def_link = def_link.replace("URL:", "")
                def_link = def_link.replace("\\:", ":")
                def_link = def_link.replace('\\', '')

                if "," in def_link:
                    def_link = def_link.split(",")
                    for link in def_link:
                        if link.strip().startswith("http"):
                            def_links_processed.append(link)
                else:
                    if def_link.strip().startswith("http"):
                        def_links_processed.append(def_link)

            # TODO: make this a generic section based on the resourceDescriptor.yaml file.
            # need to have MODs add disease pages to their yaml stanzas

            # NU: alt_ids = node.get('alt_id')
            # if alt_ids:
            #     if not isinstance(alt_ids, (list, tuple)):
            #         alt_ids = [alt_ids]
            # else:
            #     alt_ids = []

            # TODO: Need to add urls to resource Descriptis for SGD and MGI.
            # NOTE: MGI had one but has 'MGI:' at the end of the url not required here.
            dict_to_append = {
                'oid':
                node['id'],
                'name':
                node.get('label'),
                'name_key':
                node.get('label'),
                'definition':
                definition,
                'defLinksProcessed':
                def_links_processed,
                'is_obsolete':
                is_obsolete,
                'subset':
                subset,
                'oUrl':
                self.etlh.rdh2.return_url_from_key_value('DOID', node['id']),
                'rgd_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'RGD', node['id'], 'disease/all'),
                'rat_only_rgd_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'RGD', node['id'], 'disease/rat'),
                'human_only_rgd_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'RGD', node['id'], 'disease/human'),
                'mgi_link':
                'http://www.informatics.jax.org/disease/' + node['id'],
                'zfin_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'ZFIN', node['id'], 'disease'),
                'flybase_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'FB', node['id'], 'disease'),
                'wormbase_link':
                self.etlh.rdh2.return_url_from_key_value(
                    'WB', node['id'], 'disease'),
                'sgd_link':
                'https://yeastgenome.org/disease/' + node['id']
            }

            do_term_list.append(dict_to_append)

            if counter == batch_size:
                yield [
                    do_term_list, do_isas_list, do_synonyms_list, xrefs,
                    do_alt_ids_list
                ]
                do_term_list = []
                do_isas_list = []
                do_synonyms_list = []
                do_alt_ids_list = []
                xrefs = []
                counter = 0

        if counter > 0:
            yield [
                do_term_list, do_isas_list, do_synonyms_list, xrefs,
                do_alt_ids_list
            ]
예제 #14
0
    def get_data(self, filepath):
        """Get Data"""

        ont = OntologyFactory().create(filepath)

        parsed_line = ont.graph.copy().node

        #Convert parsed obo term into a schema-friendly AGR dictionary.
        for key in parsed_line.items():
            node = ont.graph.node[key]
            if len(node) == 0:
                continue

            ### Switching id to curie form and saving URI in "uri"
            # might wildly break things later on???
            node["uri"] = node["id"]
            node["id"] = key

            syns = []
            xrefs = []
            xref_urls = []

            local_id = None
            def_links_unprocessed = []
            def_links_processed = []
            def_text = None
            subset = []
            definition = ""
            namespace = ""
            is_obsolete = "false"
            ident = key
            prefix = ident.split(":")[0]
            if syns is None:
                syns = []  # Set the synonyms to an empty array if None. Necessary for Neo4j parsing

            if "meta" in node:
                if "synonyms" in node["meta"]:
                    syns = [s["val"] for s in node["meta"]["synonyms"]]
                if "xrefs" in node["meta"]:

                    o_xrefs = node["meta"].get('xrefs')
                    if o_xrefs is not None:
                        for xref_id_dict in o_xrefs:
                            xref_id = xref_id_dict["val"]
                            if ":" in xref_id:
                                local_id = xref_id.split(":")[1].strip()
                                prefix = xref_id.split(":")[0].strip()
                                complete_url = ETLHelper.get_complete_url_ont(local_id, xref_id)
                                generated_xref = ETLHelper.get_xref_dict( \
                                        local_id, prefix,
                                        "ontology_provided_cross_reference",
                                        "ontology_provided_cross_reference",
                                        xref_id,
                                        complete_url,
                                        xref_id + "ontology_provided_cross_reference")
                                generated_xref["oid"] = ident
                                xref_urls.append(generated_xref)
                        else:
                            if ":" in o_xrefs:
                                local_id = o_xrefs.split(":")[1].strip()
                                prefix = o_xrefs.split(":")[0].strip()
                                complete_url = ETLHelper.get_complete_url_ont(local_id, o_xrefs)
                                generated_xref = ETLHelper.get_xref_dict( \
                                        local_id,
                                        prefix,
                                        "ontology_provided_cross_reference",
                                        "ontology_provided_cross_reference",
                                        o_xrefs,
                                        complete_url,
                                        o_xrefs)
                                generated_xref["oid"] = ident
                                xref_urls.append(generated_xref)
                if node["meta"].get('is_obsolete'):
                    is_obsolete = "true"
                elif node["meta"].get('deprecated'):
                    is_obsolete = "true"
                if "definition" in node["meta"]:
                    definition = node["meta"]["definition"]["val"]
                    def_links_unprocessed = node["meta"]["definition"]["xrefs"]
                if "subsets" in node["meta"]:
                    new_subset = node['meta'].get('subsets')
                    if isinstance(new_subset, (list, tuple)):
                        subset = new_subset
                    else:
                        if new_subset is not None:
                            subset.append(new_subset)
                if len(subset) > 1:
                    converted_subsets = []
                    for subset_str in subset:
                        if "#" in subset_str:
                            subset_str = subset_str.split("#")[-1]
                        converted_subsets.append(subset_str)
                    subset = converted_subsets
                if "basicPropertyValues" in node['meta']:
                    for bpv in node['meta']['basicPropertyValues']:
                        if bpv.get('pred') == 'OIO:hasOBONamespace':
                            namespace = bpv.get('val')
                            break

            # Set the synonyms to an empty array if None. Necessary for Neo4j parsing
            if xrefs is None:
                xrefs = []

            all_parents = ont.parents(key)
            all_parents.append(key)

            # Improves performance when traversing relations
            all_parents_subont = ont.subontology(all_parents)

            isas_without_names = all_parents_subont.parents(key, relations=['subClassOf'])
            partofs_without_names = all_parents_subont.parents(key, relations=['BFO:0000050'])
            regulates = all_parents_subont.parents(key, relations=['RO:0002211'])
            negatively_regulates = all_parents_subont.parents(key, relations=['RO:0002212'])
            positively_regulates = all_parents_subont.parents(key, relations=['RO:0002213'])

            def_links_unprocessed = []
            def_links = ""
            if definition is None:
                definition = ""
            else:
                #Remove new lines that cause this to split across two lines in the file
                #definition = definition.replace('\n', ' ')

                #Remove any extra double space that might have been introduces in the last replace
                #definition = definition.replace('  ', ' ')
                if definition is not None and "\"" in definition:
                    split_definition = definition.split("\"")
                    if len(split_definition) > 1:
                        def_text = split_definition[1].strip()
                        if len(split_definition) > 2 and "[" in split_definition[2].strip():
                            def_links = split_definition[2].strip()
                            def_links_unprocessed.append(def_links.rstrip("]").replace("[", ""))
                else:
                    def_text = definition

            for def_link_str in def_links_unprocessed:
                def_link_str = def_link_str.replace("url:www", "http://www")
                def_link_str = def_link_str.replace("url:", "")
                def_link_str = def_link_str.replace("URL:", "")
                def_link_str = def_link_str.replace("\\:", ":")

                if "," in def_link_str:
                    def_links = def_link_str.split(",")
                    for link in def_links:
                        if link.strip().startswith("http"):
                            def_links_processed.append(link)
                # elif "." in dl:
                #     dl = dl.split(".")
                #     for link in dl:
                #         if link.strip().startswith("http"):
                #             def_links_processed.append(link)
                else:
                    if def_link_str.strip().startswith("http"):
                        def_links_processed.append(def_link_str)

            # TODO: make this a generic section based on hte resourceDescriptor.yaml file.
            # need to have MODs add disease pages to their yaml stanzas


            alt_ids = node.get('alt_id')
            if alt_ids:
                if not isinstance(alt_ids, (list, tuple)):
                    alt_ids = [alt_ids]
            else:
                alt_ids = []

            dict_to_append = {

                'o_type': namespace,
                'name': node.get('label'),
                'href': 'http://amigo.geneontology.org/amigo/term/' + node['id'],
                'name_key': node.get('label'),
                'oid': node['id'],
                'definition': definition,
                'is_obsolete': is_obsolete,
                'subset': subset,
                'o_synonyms': syns,
                'isas': isas_without_names,
                'partofs': partofs_without_names,
                'regulates': regulates,
                'negatively_regulates': negatively_regulates,
                'positively_regulates': positively_regulates,

                ### This data might be needed for gene descriptions
                ### Maybe should be turned into a different method in order
                ### to keep the go do dict's smaller
                #'o_genes': [],
                #'o_species': [],
                #'xrefs': xrefs,
                #'ontologyLabel': filepath,
                #TODO: fix links to not be passed for each ontology load.
                #'rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html'\
                #              + '?species=All&x=1&acc_id='+node['id']+'#annot',
                #'rgd_all_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                   + 'species=All&x=1&acc_id=' + node['id'] + '#annot',
                #'rat_only_rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                       + 'species=Rat&x=1&acc_id=' +node['id'] + '#annot',
                #'human_only_rgd_link': 'http://rgd.mcw.edu/rgdweb/ontology/annot.html?'\
                #                        + 'species=Human&x=1&acc_id=' +node['id'] + '#annot',
                #'mgi_link': 'http://www.informatics.jax.org/disease/'+node['id'],
                #'wormbase_link': 'http://www.wormbase.org/resources/disease/'+node['id'],
                #'sgd_link': 'https://yeastgenome.org/disease/'+node['id'],
                #'flybase_link': 'http://flybase.org/cgi-bin/cvreport.html?id='+node['id'],
                #'zfin_link': 'https://zfin.org/'+node['id'],
                #'oUrl': "http://www.disease-ontology.org/?id=" + node['id'],
                #'oPrefix': prefix,
                #'crossReferences': xref_urls,
                #'defText': def_text,
                #'defLinksProcessed': def_links_processed,
                #'oboFile': prefix,
                #'category': 'go',
                #'alt_ids': alt_ids,
            }

            if node['id'] == 'GO:0099616':
                print(dict_to_append)

            node = {**node, **dict_to_append}
            ont.graph.node[node["id"]] = node

        return ont
예제 #15
0
    def test_set_covering_with_ontology(self):

        #              0                   ic(0) = 0
        #            /| |\
        #           / | | \
        #          1  2 3  4               ic(1) = 0.693147181, ic(2) = 0.470003629, ic(3) = 0.980829253
        #         /\ /\/ \/
        #        /  5 6  7                 ic(5) = 0.980829253, ic(6) = 1.16315081, ic(7) = 1.16315081
        #       /  /\  \/
        #      /  8  9 10                  ic(8) = 1.049822124, ic(10) = 1.252762968
        #      \ / \/   \
        #      11  12   13                 ic(11) = 1.386294361, ic(12) = 1.386294361, ic(13) = 1.386294361

        ontology = OntologyFactory().create()
        for i in range(14):
            ontology.add_node(i, 'node' + str(i))
        ontology.add_parent(1, 0)
        ontology.add_parent(2, 0)
        ontology.add_parent(3, 0)
        ontology.add_parent(4, 0)
        ontology.add_parent(5, 1)
        ontology.add_parent(5, 2)
        ontology.add_parent(6, 2)
        ontology.add_parent(6, 3)
        ontology.add_parent(7, 3)
        ontology.add_parent(7, 4)
        ontology.add_parent(8, 5)
        ontology.add_parent(9, 5)
        ontology.add_parent(10, 6)
        ontology.add_parent(10, 7)
        ontology.add_parent(11, 1)
        ontology.add_parent(11, 8)
        ontology.add_parent(12, 8)
        ontology.add_parent(12, 9)
        ontology.add_parent(13, 10)

        subsets = [
            CommonAncestor(node_id=1,
                           node_label="1",
                           covered_starting_nodes={"11", "12"}),
            CommonAncestor(node_id=2,
                           node_label="2",
                           covered_starting_nodes={"11", "12", "13"}),
            CommonAncestor(node_id=3,
                           node_label="3",
                           covered_starting_nodes={"13"}),
            CommonAncestor(node_id=4,
                           node_label="4",
                           covered_starting_nodes={"13"}),
            CommonAncestor(node_id=5,
                           node_label="2",
                           covered_starting_nodes={"11", "12"}),
            CommonAncestor(node_id=6,
                           node_label="6",
                           covered_starting_nodes={"13"}),
            CommonAncestor(node_id=7,
                           node_label="7",
                           covered_starting_nodes={"13"}),
            CommonAncestor(node_id=8,
                           node_label="8",
                           covered_starting_nodes={"11", "12"}),
            CommonAncestor(node_id=9,
                           node_label="9",
                           covered_starting_nodes={"12"}),
            CommonAncestor(node_id=10,
                           node_label="10",
                           covered_starting_nodes={"13"}),
            CommonAncestor(node_id=11,
                           node_label="11",
                           covered_starting_nodes={"11"}),
            CommonAncestor(node_id=12,
                           node_label="12",
                           covered_starting_nodes={"12"}),
            CommonAncestor(node_id=13,
                           node_label="13",
                           covered_starting_nodes={"13"})
        ]

        values = [1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 100, 1, 1]
        res = find_set_covering(subsets=subsets,
                                ontology=ontology,
                                value=values,
                                max_num_subsets=2)
        self.assertTrue(all([sub[0] != 11 for sub in res]))
예제 #16
0
    def get_generators(self, filepath, batch_size):  # noqa
        """Get Generators."""
        ont = OntologyFactory().create(filepath)
        parsed_line = ont.graph.copy().node

        go_term_list = []
        go_isas_list = []
        go_partofs_list = []
        go_synonyms_list = []
        go_regulates_list = []
        go_negatively_regulates_list = []
        go_positively_regulates_list = []
        go_altids_list = []
        counter = 0

        # Convert parsed obo term into a schema-friendly AGR dictionary.
        for key, line in parsed_line.items():
            counter = counter + 1
            node = ont.graph.node[key]
            if len(node) == 0:
                continue
            if node.get('type') == 'PROPERTY':
                continue

            # Switching id to curie form and saving URI in "uri"
            # might wildly break things later on???
            node["uri"] = node["id"]
            node["id"] = key

            subset = []
            definition = ""
            is_obsolete = "false"

            if "meta" in node:
                meta = node.get('meta')
                basic_property_values = meta.get('basicPropertyValues')
                for property_value_map in basic_property_values:
                    pred = property_value_map['pred']
                    val = property_value_map['val']
                    if pred == 'OIO:hasOBONamespace':
                        term_type = val

                if "synonyms" in node["meta"]:
                    syns = [s["val"] for s in node["meta"]["synonyms"]]
                    for synonym in syns:
                        go_synonym = {"primary_id": key, "synonym": synonym}
                        go_synonyms_list.append(go_synonym)

                if "basicPropertyValues" in node["meta"]:
                    alt_ids = [
                        s["val"] for s in node["meta"]["basicPropertyValues"]
                    ]
                    for alt_id in alt_ids:
                        if "GO:" in alt_id:
                            secondary_id = {
                                "primary_id": key,
                                "secondary_id": alt_id
                            }
                            go_altids_list.append(secondary_id)

                if node["meta"].get('is_obsolete'):
                    is_obsolete = "true"
                elif node["meta"].get('deprecated'):
                    is_obsolete = "true"

                if "definition" in node["meta"]:
                    definition = node["meta"]["definition"]["val"]

                if "subsets" in node["meta"]:
                    new_subset = node['meta'].get('subsets')
                    if isinstance(new_subset, (list, tuple)):
                        subset = new_subset
                    else:
                        if new_subset is not None:
                            subset.append(new_subset)

                if len(subset) > 1:
                    converted_subsets = []
                    for subset_str in subset:
                        if "#" in subset_str:
                            subset_str = subset_str.split("#")[-1]
                        converted_subsets.append(subset_str)
                    subset = converted_subsets

            all_parents = ont.parents(key)
            all_parents.append(key)

            # Improves performance when traversing relations
            all_parents_subont = ont.subontology(all_parents)
            isas_without_names = all_parents_subont.parents(
                key, relations=['subClassOf'])
            for item in isas_without_names:
                dictionary = {"primary_id": key, "primary_id2": item}
                go_isas_list.append(dictionary)

            partofs_without_names = all_parents_subont.parents(
                key, relations=['BFO:0000050'])
            for item in partofs_without_names:
                dictionary = {"primary_id": key, "primary_id2": item}
                go_partofs_list.append(dictionary)

            regulates = all_parents_subont.parents(key,
                                                   relations=['RO:0002211'])

            for item in regulates:
                dictionary = {"primary_id": key, "primary_id2": item}
                go_regulates_list.append(dictionary)

            negatively_regulates = all_parents_subont.parents(
                key, relations=['RO:0002212'])
            for item in negatively_regulates:
                dictionary = {"primary_id": key, "primary_id2": item}
                go_negatively_regulates_list.append(dictionary)

            positively_regulates = all_parents_subont.parents(
                key, relations=['RO:0002213'])
            for item in positively_regulates:
                dictionary = {"primary_id": key, "primary_id2": item}
                go_positively_regulates_list.append(dictionary)

            dict_to_append = {
                'oid': key,
                'definition': definition,
                'type': term_type,
                'name': node.get('label'),
                'subset': subset,
                'name_key': node.get('label'),
                'is_obsolete': is_obsolete,
                'href':
                'http://amigo.geneontology.org/amigo/term/' + node['id'],
            }

            go_term_list.append(dict_to_append)

            if counter == batch_size:
                yield [
                    go_term_list, go_isas_list, go_partofs_list,
                    go_synonyms_list, go_regulates_list,
                    go_negatively_regulates_list, go_positively_regulates_list,
                    go_altids_list
                ]

                go_term_list = []
                go_isas_list = []
                go_partofs_list = []
                go_synonyms_list = []
                go_regulates_list = []
                go_negatively_regulates_list = []
                go_positively_regulates_list = []
                go_altids_list = []
                counter = 0

        if counter > 0:
            yield [
                go_term_list, go_isas_list, go_partofs_list, go_synonyms_list,
                go_regulates_list, go_negatively_regulates_list,
                go_positively_regulates_list, go_altids_list
            ]
예제 #17
0
def expand(tsvfile, cols, ontology, output):

    factory = OntologyFactory()
    ontobj = factory.create(ontology)
    expand_tsv(tsvfile, ontology=ontobj, outfile=output, cols=cols)