Beispiel #1
0
 def download_articles(self):
     """
     Function that downloads articles' text.
     :return: Results list.
     """
     logger.info('')
     m = multiprocessing.Manager()
     results = m.list()
     utils.async_getter(
         function_pointer=self.download_article,
         input_list=self.parent.articles,
         pass_data={"results": results},
     )
     return results
Beispiel #2
0
 def get_rss_feeds(self, num_feeds_max):
     """
     Function that downloads RSS feeds (links are stored in RSS_FEEDS),
     and returns articles in the required format in a list.
     :param num_feeds_max: Maximum number of feeds.
     :return: Articles in a list.
     """
     logger.info('num_feeds_max:%s' % num_feeds_max)
     m = multiprocessing.Manager()
     articles = m.list()
     rss_feeds = RSS_FEEDS[:num_feeds_max]
     utils.async_getter(function_pointer=self.get_rss_feed,
                        input_list=rss_feeds,
                        pass_data={"results": articles})
     return articles
def main():
    files = glob(os.path.expanduser('~/git/methodsOntology-upstream/to_be_integrated_in_NIF/*'))
    rows = []
    got_header = False
    for file in files:
        with open(file, 'rt') as f:
            r = [r for r in csv.reader(f, delimiter='|')]
        if got_header:
            r = r[1:]
        else:
            got_header = True
        rows.extend(r)

    def async_func(row):
        resps = sgv.findByTerm(row[2])
        if resps:
            n = resps[0]
            c, l = n['curie'], n['labels'][0]
        else:
            c, l = None, None
        r = row + [c, l]
        return r

    matched = [rows[0] + ['e_curie', 'e_label']] + async_getter(async_func, [(r,) for r in rows[1:]])

    embed()
Beispiel #4
0
def main():
    files = glob(os.path.expanduser('~/git/methodsOntology-upstream/to_be_integrated_in_NIF/*'))
    rows = []
    got_header = False
    for file in files:
        with open(file, 'rt') as f:
            r = [r for r in csv.reader(f, delimiter='|')]
        if got_header:
            r = r[1:]
        else:
            got_header = True
        rows.extend(r)

    def async_func(row):
        resps = sgv.findByTerm(row[2])
        if resps:
            n = resps[0]
            c, l = n['curie'], n['labels'][0]
        else:
            c, l = None, None
        r = row + [c, l]
        return r

    matched = [rows[0] + ['e_curie', 'e_label']] + async_getter(async_func, [(r,) for r in rows[1:]])

    embed()
Beispiel #5
0
def swanson():
    """ not really a parcellation scheme """
    source = 'resources/swanson_aligned.txt'
    ONT_PATH = GENERATED
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE')
    PREFIXES.update({
        #'':ontid + '/',  # looking for better options
        'SWAN': interlex_namespace('swanson/nt/term'),
        'SWAA': interlex_namespace('swanson/nt/appendix'),
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc=WRITELOC)
    new_graph.add_ont(
        ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies',
        'This file is automatically generated from ' + source + '.' + NOTICE,
        TODAY)

    # FIXME citations should really go on the ... anatomy? scheme artifact
    definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.'
    definingCitationID = 'ISBN:9780195340624'
    new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation)
    new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID)

    with open(source, 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([
        ' ('.join([n.capitalize() for n in _.split(' (')])
        for _ in lines[635].lower().split(' or ')
    ]).replace('human', 'HUMAN')
    lines[635] = fixed

    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.' * 5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP = n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP = l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None

            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1], ) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else []
              for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(
                            self.children)
                        self.appendicies[self._appendix]['parents'] = dict(
                            self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':
                        apname.capitalize(),
                        'type':
                        self.citation.capitalize() if self.citation else None
                    }
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix][
                            'taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[
                            self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(
                        self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[
                        self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [
                    sorted(n) for k, n in self.names.items() if len(n) > 1
            ]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [
        _ for _ in sorted([
            '{: <50}'.format(n['label']) +
            n['uberon'] if n['uberon'] else n['label']
            for n in sp.nodes.values()
        ])
    ]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = PREFIXES['SWAN'] + '%s'
    json_ = {'nodes': [], 'edges': []}
    parent = ILXREPLACE('swansonBrainRegionConcept')
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, parent, label=anns['label'])
        new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl': anns['label'], 'id': 'SWA:' + str(node)})
        #if anns['uberon']:
        #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = PREFIXES['SWAA'] + str(appendix)
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_trip(
            aid, 'ilx:hasTaxonRank',
            data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items(
        ):  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(
                    cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({
                    'sub': 'SWA:' + str(child),
                    'pred': apo,
                    'obj': 'SWA:' + str(parent)
                })

    new_graph.write()
    if False:
        Query = namedtuple('Query',
                           ['root', 'relationshipType', 'direction', 'depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1),
                                    'INCOMING', 10),
                             json=json_)
            print(a)
    return ontid, None
Beispiel #6
0
def make_nifga_graph(_doprint=False):
    # use equivalent class mappings to build a replacement mapping
    g = rdflib.Graph()
    g.parse(nifga_path, format='turtle')

    getQname = g.namespace_manager.qname
    classes = sorted([
        getQname(_) for _ in g.subjects(RDF.type, OWL.Class)
        if type(_) is URIRef
    ])
    curies = ['NIFGA:' + n for n in classes if ':' not in n]
    matches = async_getter(sgv.findById, [(c, ) for c in curies])

    replaced_by = {}
    exact = {}
    internal_equivs = {}
    irbcs = {}

    def equiv(curie, label):
        if curie in manual:
            replaced_by[curie] = manual[curie]
            return manual[curie]

        ec = sgg.getNeighbors(curie, relationshipType='equivalentClass')
        nodes = [n for n in ec['nodes'] if n['id'] != curie]
        if len(nodes) > 1:
            #print('wtf node', [n['id'] for n in nodes], curie)
            for node in nodes:
                id_ = node['id']
                label_ = node['lbl']

                if id_.startswith('UBERON'):
                    if curie in replaced_by:
                        one = replaced_by[curie]
                        replaced_by[curie] = one, id_
                        print('WE GOT DUPES', curie, label, one, id_)  # TODO
                    else:
                        replaced_by[curie] = id_
                else:
                    internal_equivs[curie] = id_
        elif not nodes:
            node = sgg.getNode(curie)['nodes'][0]
            if OWL.deprecated.toPython() in node['meta']:
                print('THIS CLASS IS DEPRECATED', curie)
                lbl = node['lbl']
                if lbl.startswith(
                        'Predominantly white regional') or lbl.startswith(
                            'Predominantly gray regional'):
                    print('\tHE\'S DEAD JIM!', lbl, node['id'])
                    replaced_by[curie] = 'NOREP'
                if IRBC in node['meta']:
                    existing_replaced = node['meta'][IRBC][0]
                    ec2 = sgg.getNeighbors(existing_replaced,
                                           relationshipType='equivalentClass')
                    print('\tFOUND ONE', existing_replaced)
                    #scigPrint.pprint_node(sgg.getNode(existing_replaced))
                    if ec2['edges']:  # pass the buck if we can
                        print('\t', end='')
                        scigPrint.pprint_edge(ec2['edges'][0])
                        rb = ec2['edges'][0]['obj']
                        print('\tPASSING BUCK : (%s -> %s -> %s)' %
                              (curie, existing_replaced, rb))
                        irbcs[curie] = (existing_replaced, rb)
                        replaced_by[curie] = rb
                        return nodes
                    else:
                        er_node = sgv.findById(existing_replaced)
                        if not er_node['deprecated']:
                            if not er_node['curie'].startswith('NIFGA:'):
                                print('\tPASSING BUCK : (%s -> %s)' %
                                      (curie, er_node['curie']))
                                return nodes

                        print(
                            '\tERROR: could not pass buck, we are at a dead end at',
                            er_node)  # TODO
                    print()

            moar = [
                t for t in sgv.findByTerm(label)
                if t['curie'].startswith('UBERON')
            ]
            if moar:
                #print(moar)
                #replaced_by[curie] = moar[0]['curie']
                if len(moar) > 1:
                    print('WARNING', curie, label,
                          [(m['curie'], m['labels'][0]) for m in moar])

                for node in moar:
                    #if node['curie'] in uberon_obsolete:  # node['deprecated']?
                    #continue
                    ns = sgg.getNode(node['curie'])
                    assert len(
                        ns['nodes']) == 1, "WTF IS GOING ON %s" % node['curie']
                    ns = ns['nodes'][0]
                    if _doprint:
                        print(
                            'Found putative replacement in moar: (%s -> %s)' %
                            (curie, ns['id']))
                        if DBX in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][DBX],
                                  node['labels'][0], node['synonyms'])

                        if AID in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][AID],
                                  node['labels'][0], node['synonyms'])

                        if CON in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][CON],
                                  node['labels'][0], node['synonyms'])

                    replaced_by[curie] = ns['id']
            else:
                replaced_by[curie] = None
                if False:  # review
                    print('NO FORWARD EQUIV', tc.red(curie), label)  # TODO
                    for k, v in sorted(
                            sgg.getNode(curie)['nodes'][0]['meta'].items()):
                        if type(v) == iter:
                            print(' ' * 4, k)
                            for _ in v:
                                print(' ' * 8, _)
                        else:
                            print(' ' * 4, k, v)
        else:
            node = nodes[0]
            replaced_by[curie] = node['id']
            exact[curie] = node['id']

        return nodes

    equivs = [equiv(c['curie'], c['labels'][0])
              for c in matches]  # async causes print issues :/

    return g, matches, exact, internal_equivs, irbcs, replaced_by
Beispiel #7
0
curies = c.getCuries()
curies.pop('')  # don't want NIFSTD uris just yet

with open(os.path.expanduser('~/git/nlxeol/neurolex_full.csv'), 'rt') as f:
    rows = [r for r in csv.reader(f)]

Id = rows[0].index('Id')
ids = [(r[Id],) for r in rows if r[Id] and r[Id] != 'Id' and 'Resource:' not in r[0]]

items = tuple(curies.items())
findById = v.findById
def async_func(id_):
    if ':' in id_:
        out = findById(id_)
        if out:
            return id_, out['curie']
    for prefix, uri in items:
        curie = prefix + ':' + id_
        out = findById(curie)
        if out:
            return id_, out['curie']
    return id_, 'NLXONLY'
        
id_curie = async_getter(async_func, ids)
j = {id_:curie for id_, curie in id_curie}


with open('/tmp/total_curie_fragment.json', 'wt') as f:
    json.dump(j, f, sort_keys=True, indent=4)

with open(os.path.expanduser('~/git/nlxeol/neurolex_full.csv'), 'rt') as f:
    rows = [r for r in csv.reader(f)]

Id = rows[0].index('Id')
ids = [(r[Id], ) for r in rows
       if r[Id] and r[Id] != 'Id' and 'Resource:' not in r[0]]

items = tuple(curies.items())
findById = v.findById


def async_func(id_):
    if ':' in id_:
        out = findById(id_)
        if out:
            return id_, out['curie']
    for prefix, uri in items:
        curie = prefix + ':' + id_
        out = findById(curie)
        if out:
            return id_, out['curie']
    return id_, 'NLXONLY'


id_curie = async_getter(async_func, ids)
j = {id_: curie for id_, curie in id_curie}

with open('/tmp/total_curie_fragment.json', 'wt') as f:
    json.dump(j, f, sort_keys=True, indent=4)
Beispiel #9
0
def swanson():
    """ not really a parcellation scheme """
    ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/'
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('ilx', 'owl', 'OBOANN', 'UBERON')
    PREFIXES.update({
        '':ontid + '/',  # looking for better options
        'SWAN':'http://swanson.org/node/',
        'SWAA':'http://swanson.org/appendix/',
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/parc/')
    new_graph.add_ont(ontid,
                      'Swanson brain partomies',
                      'Swanson 2014 Partonomies',
                      'This file is automatically generated from....',
                      TODAY)
            
    with open('resources/swanson_aligned.txt', 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN')
    lines[635] = fixed
    
    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.'*5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP =  n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP =  l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None
            
            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1],) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(self.children)
                        self.appendicies[self._appendix]['parents'] = dict(self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':apname.capitalize(),
                        'type':self.citation.capitalize() if self.citation else None}
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = 'http://swanson.org/node/%s' 
    json_ = {'nodes':[],'edges':[]}
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, 'ilx:swansonBrainRegionConcept', label=anns['label'])
        new_graph.add_node(nid, 'OBOANN:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)})
        #if anns['uberon']:
            #new_graph.add_node(nid, rdflib.OWL.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = 'http://swanson.org/appendix/%s' % appendix
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_node(aid, 'ilx:hasTaxonRank', data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items():  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)})

    new_graph.write(convert=False)
    if False:
        Query = namedtuple('Query', ['root','relationshipType','direction','depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_)
            print(a)
    return ontid, None
Beispiel #10
0
def make_nifga_graph(_doprint=False):
    # use equivalent class mappings to build a replacement mapping
    g = rdflib.Graph()
    g.parse(nifga_path, format='turtle')

    getQname = g.namespace_manager.qname
    classes = sorted([getQname(_) for _ in g.subjects(RDF.type, OWL.Class) if type(_) is URIRef])
    curies = ['NIFGA:' + n for n in classes if ':' not in n]
    matches = async_getter(sgv.findById, [(c,) for c in curies])

    replaced_by = {}
    exact = {}
    internal_equivs = {}
    irbcs = {}
    def equiv(curie, label):
        if curie in manual:
            replaced_by[curie] = manual[curie]
            return manual[curie]

        ec = sgg.getNeighbors(curie, relationshipType='equivalentClass')
        nodes = [n for n in ec['nodes'] if n['id'] != curie]
        if len(nodes) > 1:
            #print('wtf node', [n['id'] for n in nodes], curie)
            for node in nodes:
                id_ = node['id']
                label_ = node['lbl']

                if id_.startswith('UBERON'):
                    if curie in replaced_by:
                        one = replaced_by[curie]
                        replaced_by[curie] = one, id_
                        print('WE GOT DUPES', curie, label, one, id_)  # TODO
                    else:
                        replaced_by[curie] = id_
                else:
                    internal_equivs[curie] = id_
        elif not nodes:
            node = sgg.getNode(curie)['nodes'][0]
            if OWL.deprecated.toPython() in node['meta']:
                print('THIS CLASS IS DEPRECATED', curie)
                lbl = node['lbl']
                if lbl.startswith('Predominantly white regional') or lbl.startswith('Predominantly gray regional'):
                    print('\tHE\'S DEAD JIM!', lbl, node['id'])
                    replaced_by[curie] = 'NOREP'
                if IRBC in node['meta']:
                    existing_replaced = node['meta'][IRBC][0]
                    ec2 = sgg.getNeighbors(existing_replaced, relationshipType='equivalentClass')
                    print('\tFOUND ONE', existing_replaced)
                    #scigPrint.pprint_node(sgg.getNode(existing_replaced))
                    if ec2['edges']:  # pass the buck if we can
                        print('\t',end='')
                        scigPrint.pprint_edge(ec2['edges'][0])
                        rb = ec2['edges'][0]['obj']
                        print('\tPASSING BUCK : (%s -> %s -> %s)' % (curie, existing_replaced, rb))
                        irbcs[curie] = (existing_replaced, rb)
                        replaced_by[curie] = rb
                        return nodes
                    else:
                        er_node = sgv.findById(existing_replaced)
                        if not er_node['deprecated']:
                            if not er_node['curie'].startswith('NIFGA:'):
                                print('\tPASSING BUCK : (%s -> %s)' % (curie, er_node['curie']))
                                return nodes

                        print('\tERROR: could not pass buck, we are at a dead end at', er_node)  # TODO
                    print()

            moar = [t for t in sgv.findByTerm(label) if t['curie'].startswith('UBERON')]
            if moar:
                #print(moar)
                #replaced_by[curie] = moar[0]['curie']
                if len(moar) > 1:
                    print('WARNING', curie, label, [(m['curie'], m['labels'][0]) for m in moar])

                for node in moar:
                    #if node['curie'] in uberon_obsolete:  # node['deprecated']?
                        #continue
                    ns = sgg.getNode(node['curie'])
                    assert len(ns['nodes']) == 1, "WTF IS GOING ON %s" % node['curie']
                    ns = ns['nodes'][0]
                    if _doprint:
                        print('Found putative replacement in moar: (%s -> %s)' % (curie, ns['id']))
                        if DBX in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][DBX],
                                  node['labels'][0], node['synonyms'])

                        if AID in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][AID],
                                  node['labels'][0], node['synonyms'])

                        if CON in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][CON],
                                  node['labels'][0], node['synonyms'])

                    replaced_by[curie] = ns['id']
            else:
                replaced_by[curie] = None
                if False:  # review
                    print('NO FORWARD EQUIV', tc.red(curie), label)  # TODO
                    for k,v in sorted(sgg.getNode(curie)['nodes'][0]['meta'].items()):
                        if type(v) == iter:
                            print(' ' * 4, k)
                            for _ in v:
                                print(' ' * 8, _)
                        else:
                            print(' ' * 4, k, v)
        else:
            node = nodes[0]
            replaced_by[curie] = node['id']
            exact[curie] = node['id']

        return nodes

    equivs = [equiv(c['curie'], c['labels'][0]) for c in matches]  # async causes print issues :/

    return g, matches, exact, internal_equivs, irbcs, replaced_by