Ejemplo n.º 1
0
def catalog_extras(fetch=False):
    path = Path(auth.get_path('ontology-local-repo'), 'ttl')
    cat = (path / 'catalog-v001.xml').as_posix()
    with open((path / '../catalog-extras').as_posix(),
              'rt') as ce, open(cat, 'rt') as c:
        clines = c.readlines()
        celines = ce.readlines()

    if clines[-2] != celines[-1]:
        with open(cat, 'wt') as f:
            f.writelines(clines[:-1] + celines + clines[-1:])
    else:
        print(tc.blue('INFO:'),
              'extras already added to catalog doing nothing')

    if fetch:
        print(tc.blue('INFO:'), 'fetching extras')

        def fetch_and_save(url, loc):
            resp = requests.get(url)
            saveloc = (path / loc).as_posix()
            if resp.ok:
                with open(saveloc, 'wb') as f:
                    f.write(resp.content)

                print(tc.blue('INFO:'), f'{url:<60} written to {loc}')
            else:
                print(tc.red('WARNING:'), f'failed to fetch {url}')

        Async()(deferred(fetch_and_save)(url, loc) for line in celines
                for _, _, _, url, _, loc, _ in (line.split('"'), ))
Ejemplo n.º 2
0
def do_patch(patch_config, local_base):
    repo_base = Path(local_base)
    config_path = Path(patch_config)
    with open(patch_config, 'rt') as f:
        config = yaml.safe_load(f)

    for patchset, patches in config.items():
        for patch, target_remote in patches.items():
            patchfile = config_path.parent / patch
            if not patchfile.exists():
                raise FileNotFoundError(
                    f'Cannot find {patchfile} specified in {config_path}')
            target = target_remote['target']
            targetfile = repo_base / target
            if 'remote' in target_remote and not targetfile.exists():
                remote = target_remote['remote']
                resp = requests.get(remote)
                with open(targetfile, 'wb') as f:
                    f.write(resp.content)

            print(tc.blue('INFO: patching'), patchset, patchfile, targetfile)
            try:
                out = subprocess.check_output(
                    ['patch', '-p1', '-N', '-i',
                     patchfile.as_posix()],
                    cwd=repo_base.as_posix(),
                    stderr=subprocess.STDOUT).decode().rstrip()
                print(out)
                yield targetfile.as_posix()
            except subprocess.CalledProcessError as e:
                # FIXME this is not failing on other types of patching errors!
                if e.returncode > 1:  # 1 means already applied
                    print(e.stdout.decode())
                    raise e
Ejemplo n.º 3
0
    def _ontology_local_repo(self):
        try:
            stated_repo = Path(self.config['ontology_local_repo'])
        except (KeyError, TypeError, FileNotFoundError) as e:
            stated_repo = Path('/dev/null/does-not-exist')

        maybe_repo = self._maybe_repo
        if stated_repo.exists():
            return stated_repo
        elif maybe_repo.exists():
            return maybe_repo
        else:
            maybe_start = Path(__file__).parent.parent.parent.absolute()
            maybe_base = maybe_start
            fsroot = Path('/')
            while maybe_base != fsroot:
                maybe_repo = maybe_base / self.ontology_repo
                if maybe_repo.exists():
                    log.info(
                        tc.blue('INFO:') +
                        f'Ontology repository found at {maybe_repo}')
                    return maybe_repo
                else:
                    maybe_base = maybe_base.parent
            else:
                log.warning(
                    tc.red('WARNING:') +
                    f'No repository found in any parent directory of {maybe_start}'
                )

        return Path('/dev/null/does-not-exist')  # seems reaonsable ...
                def test_file(self,
                              module_path=module_path,
                              stem=stem,
                              fname=fname):
                    try:
                        print(tc.ltyellow('IMPORTING:'), module_path)
                        module = import_module(
                            module_path)  # this returns the submod
                        self._modules[module_path] = module
                        if hasattr(module, '_CHECKOUT_OK'):
                            print(tc.blue('MODULE CHECKOUT:'), module,
                                  module._CHECKOUT_OK)
                            setattr(module, '_CHECKOUT_OK', True)
                            #print(tc.blue('MODULE'), tc.ltyellow('CHECKOUT:'), module, module._CHECKOUT_OK)
                    #except BaseException as e:
                    # FIXME this does not work because collected tests cannot be uncollected
                    #suffix = fname.split('__', 1)[-1]
                    #for mn in dir(self):
                    #if suffix in mn:
                    #old_func = getattr(self, mn)
                    #new_func = pytest.mark.xfail(raises=ModuleNotFoundError)(old_func)
                    #setattr(self, mn, new_func)

                    #raise e
                    finally:
                        post_load()
Ejemplo n.º 5
0
        def fetch_and_save(url, loc):
            resp = requests.get(url)
            saveloc = (path / loc).as_posix()
            if resp.ok:
                with open(saveloc, 'wb') as f:
                    f.write(resp.content)

                print(tc.blue('INFO:'), f'{url:<60} written to {loc}')
            else:
                print(tc.red('WARNING:'), f'failed to fetch {url}')
Ejemplo n.º 6
0
 def test_file(self, module_path=module_path, stem=stem):
     try:
         print(tc.ltyellow('IMPORTING:'), module_path)
         module = import_module(
             module_path)  # this returns the submod
         self._modules[module_path] = module
         if hasattr(module, '_CHECKOUT_OK'):
             print(tc.blue('MODULE CHECKOUT:'), module,
                   module._CHECKOUT_OK)
             setattr(module, '_CHECKOUT_OK', True)
             #print(tc.blue('MODULE'), tc.ltyellow('CHECKOUT:'), module, module._CHECKOUT_OK)
     finally:
         pass
Ejemplo n.º 7
0
    def chain_to_typed_chain(chain, g, func):
        # duh...
        #pprint(chain)
        for s, o in zip(chain, chain[1:]):
            # TODO deal with reversed case
            s, o = s.u, o.u
            p = None
            #print(s, o)
            printq(s, o)
            for p in get_linkers(s, o, g, func):
                #print(tc.yellow(p))
                #yield (s, edge_to_symbol(p), o)
                yield from (s, edge_to_symbol(p), o)

            if not p:
                for rp in get_linkers(o, s, g, func):
                    print(tc.blue(rp))
                    yield from (s, edge_to_symbol(rp, rev=True), o)
Ejemplo n.º 8
0
    def pprint_meta(meta, print_iri=True):
        if print_iri:
            if 'curie' in meta:
                print(meta['curie'])
            else:
                p = qname(meta['iri'])
                if p == meta['iri']:
                    for iri, short in scigPrint.shorten.items():
                        if iri in p:
                            p = p.replace(iri, short + ':')
                            break
                print()
                print(tc.blue(p))

        for k, v in sorted(meta.items()):
            if k in ('curie', 'iri'):
                continue
            for iri, short in scigPrint.shorten.items():
                if iri in k:
                    k = k.replace(iri, short + ':')
                    break
            if v is not None:
                shift = 10 if len(k) <= 10 else (20 if len(k) <= 20 else 30)
                base = ' ' * 4 + f'{k:<{shift}}'
                if isinstance(v, list):
                    if len(v) > 1:
                        print(base, '[')
                        _ = [print(' ' * 8 + scigPrint.sv(_, 8, 8)) for _ in v]
                        print(' ' * 4 + ']')
                    elif len(v) == 1:
                        asdf = v[0]
                        print(base,
                              scigPrint.sv(asdf,
                                           len(base) + 1,
                                           len(base) - 3))
                    else:
                        pass
                else:
                    print(base, scigPrint.sv(v, len(base) + 1, len(base) - 3))
Ejemplo n.º 9
0
def printe(*args, **kwargs):
    print(*(tc.blue(str(a)) for a in args), **kwargs)
Ejemplo n.º 10
0
def would_you_like_to_know_more_question_mark():

    # resolving differences between classes
    more_ids = set((
        'http://uri.neuinfo.org/nif/nifstd/readable/ChEBIid',
        'http://uri.neuinfo.org/nif/nifstd/readable/GOid',
        'http://uri.neuinfo.org/nif/nifstd/readable/MeshUid',
        'http://uri.neuinfo.org/nif/nifstd/readable/PMID',
        'http://uri.neuinfo.org/nif/nifstd/readable/UmlsCui',
        'http://uri.neuinfo.org/nif/nifstd/readable/bamsID',
        'http://uri.neuinfo.org/nif/nifstd/readable/bonfireID',
        'http://uri.neuinfo.org/nif/nifstd/readable/cell_ontology_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDataID',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDiagramID',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceId',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifTaxonKeyID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gene_Ontology_ID',
        #'http://uri.neuinfo.org/nif/nifstd/readable/hasExternalSource',
        'http://uri.neuinfo.org/nif/nifstd/readable/hasGenbankAccessionNumber',
        'http://uri.neuinfo.org/nif/nifstd/readable/imsrStandardStrainName',
        'http://uri.neuinfo.org/nif/nifstd/readable/isReplacedByClass',
        'http://uri.neuinfo.org/nif/nifstd/readable/jaxMiceID',
        'http://uri.neuinfo.org/nif/nifstd/readable/ncbiTaxID',
        'http://uri.neuinfo.org/nif/nifstd/readable/neuronamesID',
        'http://uri.neuinfo.org/nif/nifstd/readable/nifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/sao_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/umls_ID',
        'http://www.geneontology.org/formats/oboInOwl#id',
    ))

    outside = []
    eee = {}
    resolver_not_ilx_only_but_not_in_scigraph = set()  # resources.ttl
    _res = Graph().parse((gitf / 'NIF-Ontology/ttl/resources.ttl').as_posix(), format='turtle')
    reslookup = {uri:[l] for uri, l in _res.subject_objects(rdfs.label)}
    for uri in chain(h_uris, resolver_not_ilx_only):
        if 'uri.neuinfo.org' in uri:
            try:
                meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
                asdf = {hng.qname(k):v for k, v in meta.items() if k in more_ids}
            except TypeError:
                resolver_not_ilx_only_but_not_in_scigraph.add(uri)  # resources.ttl ;)
                if uri in reslookup:  # no differentia
                    asdf = False
                else:
                    asdf = False
                    print('WTF', uri)
            if asdf:
                #print(uri, asdf)
                eee[uri] = asdf
                for l in asdf.values():
                    for e in l:
                        outside.append(e)

    outside_dupes = [v for v, c in Counter(outside).most_common() if c > 1]
    eee_dupes = {k:v for k, v in eee.items() if anyMembers(outside_dupes, *(e for l in v.values() for e in l))}

    #for uri, meta in sorted(eee_dupes.items(), key=lambda a:sorted(a[1].values())):
        #print(uri.toPython(), sorted((e.replace('PMID: ', 'PMID:'), k) for k, l in meta.items() for e in l))


    # attempt to deal with label mappings
    iexisting = defaultdict(set)
    iiexisting = {}
    for i, existing in zip(datal('ilx'), datal('iri')):
        #if 'uri.neuinfo.org' in existing:
        if 'interlex.org' not in existing and 'neurolex.org' not in existing:
            iexisting[i].add(URIRef(existing))
            iiexisting[URIRef(existing)] = i
    iexisting = {**iexisting}

    _ilabs = {k:l for k, l in zip(datal('ilx'), datal('label'))}
    def inner(iri):
        resp = sgv.findById(iri)
        if resp is not None:
            l = resp['labels']
        else:
            l = [] #_ilabs[iiexisting[iri]] + '** already in ilx **']
            #print('trouble?', iri)  # ilx only
        return iri, l

    #labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in chain(h_uris, (e for s in iexisting.values() for e in s)))}
    labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in h_uris)}
    ilabs = {k:l.lower() for k, l in zip(datal('ilx'), datal('label'))}
    iilabs = {v:k for k, v in ilabs.items()}
    assert len(ilabs) == len(iilabs)
    missing_map = {k:iilabs[v.lower()] for k, v in labs.items() if v and v.lower() in iilabs}  # XXX this is not valid

    missing_existing = {i:[m, *iexisting[i]] for m, i in missing_map.items() if i in iexisting}

    missing_equivs = {next(iter(iexisting[i])):i for m, i in missing_map.items() if i in iexisting}

    eid = NIFRID.externalSourceId.toPython()
    ded = owl.deprecated.toPython()
    # SP: -> swissprot vs uniprot
    mmr = []
    proto_mmr_1_to_1 = {}
    arrr = defaultdict(set)
    uniprot_iuphar = set()
    for uri, ilx_frag in {**missing_equivs, **missing_map}.items():
        uri = URIRef(uri)
        try:
            meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
        except TypeError:
            # just ignore these, they are ilx only :/
            meta = {}
        if eid in meta:
            src = meta[eid][0]
            if src.startswith('SP:'):
                src = tc.yellow(src.replace('SP:', 'http://www.uniprot.org/uniprot/'))
            #elif src.startswith('IUPHAR:'):
                #pass
            #else:
                #src = 'TODO'
        elif ded in meta and meta[ded]:
            src = tc.red('ded ')
        else:
            src = 'TODO'
        val = labs[uri] if uri in labs else _ilabs[ilx_frag] + ' **'
        if uri in eee:
            differentia = str(eee[uri])
            for v in eee[uri].values():
                for e in v:
                    arrr[e].add(uri)
                    if 'SP:' in e or 'IUPHAR:' in e:
                        uniprot_iuphar.add(uri)
        else:
            differentia = ''

        if uri in _ilx and uri in all_uris:
            ruri = SGG[hng.qname(uri)]
            ruri = tc.blue(f'{ruri:<60}')
        else:
            ruri = uri
            ruri = f'{ruri:<60}'

        v = ' '.join((f'{val:<60}',
                      src,
                      ruri,
                      ilxb[ilx_frag],
                      differentia))
        mmr.append(v)
        proto_mmr_1_to_1[uri] = v
        src = None

    arrr = {**arrr}
    arrr_not_1_to_1 = {k:v for k, v in arrr.items() if len(v) > 1}
    #arrr_n11_uris = set((u.toPython() for v in arrr_not_1_to_1.values() for u in v))
    arrr_n11_uris = set.union(*arrr_not_1_to_1.values())
    mmr_1_to_1 = {k:v for k, v in proto_mmr_1_to_1.items() if k not in arrr_n11_uris}
    no_uniprot = {k:v for k, v in proto_mmr_1_to_1.items() if k not in uniprot_iuphar}
    arrr_n11_text = '\n'.join(f'{k:<15} {sorted(_.toPython() for _ in v)}' for k, v in arrr_not_1_to_1.items())
    mmr.sort()
    mmr_text = '\n'.join(mmr)

    mmr_1_to_1_text = '\n'.join(sorted(mmr_1_to_1.values()))

    no_uniprot_text = '\n'.join(sorted(no_uniprot.values()))
Ejemplo n.º 11
0
def main():
    for filename in ('mbaslim', 'hbaslim', 'paxinos-rat-labels',
                     'waxholm-rat-labels'):
        filepath = gitf / 'NIF-Ontology/ttl/generated/parcellation' / (
            filename + '.ttl')
        dir_ = filepath.parent.as_posix()
        print(dir_)
        file_commit = subprocess.check_output(
            [
                'git', 'log', '-n', '1', '--pretty=format:%H', '--',
                filepath.name
            ],
            cwd=dir_,
            stderr=subprocess.DEVNULL).decode().rstrip()
        graph = rdflib.Graph().parse(filepath.as_posix(), format='ttl')
        g = makeGraph('', graph=graph)

        annos = defaultdict(set)
        anno_trips = defaultdict(set)
        for triple, predicate_objects in annotation.parse(graph=graph):
            for a_p, a_o in predicate_objects:
                annos[a_p, a_o].add(triple)
                anno_trips[triple].add((a_p, a_o))

        anno_trips = {k: v for k, v in anno_trips.items()}

        for lifted_triple in restriction.parse(graph=graph):
            graph.add(lifted_triple)

        out_header = 'label|abbrev|curie|superPart curie\n'
        out = []
        editions_header = 'edition|label|abbrev|curie\n'
        editions = []
        for s in graph.subjects(rdf.type, owl.Class):
            rdfsLabel = next(graph.objects(s, rdfs.label))
            try:
                prefLabel = next(graph.objects(s, skos.prefLabel))
            except StopIteration:
                print(tc.red('WARNING:'),
                      f'skipping {s} {rdfsLabel} since it has no prefLabel')
                continue
            syns = sorted(
                graph.objects(s, NIFRID.synonym)
            )  # TODO are there cases where we need to recaptulate what we are doing for for abbrevs?
            abbrevs = sorted(graph.objects(
                s, NIFRID.abbrev))  # FIXME paxinos has more than one
            try:
                if annos:
                    if len(abbrevs) > 1:
                        print(tc.blue('INFO:'), g.qname(s),
                              repr(prefLabel.value), 'has multiple abbrevs',
                              [a.value for a in abbrevs])
                    # prefer latest
                    current_edition = ''
                    for a in abbrevs:
                        for a_p, edition in anno_trips[s, NIFRID.abbrev, a]:
                            if a_p == ilxtr.literalUsedBy:
                                if current_edition < edition:
                                    current_edition = edition
                                    abbrev = a
                else:
                    abbrev = abbrevs[0]
            except IndexError:
                abbrev = ''
            try:
                superPart = next(graph.objects(s, ilxtr.labelPartOf))
            except StopIteration:
                superPart = ''

            out.append(
                f'{prefLabel}|{abbrev}|{g.qname(s)}|{g.qname(superPart)}')

            if annos:
                #asdf = {'ed':{'label':,'abbrev':,'curie':}}
                asdf = defaultdict(dict)
                triple = s, skos.prefLabel, prefLabel
                eds = anno_trips[triple]
                for a_p, a_o in eds:
                    asdf[a_o]['curie'] = g.qname(s)
                    asdf[a_o]['label'] = prefLabel
                for syn in graph.objects(s, NIFRID.synonym):
                    triple = s, NIFRID.synonym, syn
                    eds = anno_trips[triple]
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'label' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a label "{asdf[a_o]["label"]}" for "{syn}"'
                            )
                        asdf[a_o]['label'] = syn
                for abbrev in graph.objects(s, NIFRID.abbrev):
                    triple = s, NIFRID.abbrev, abbrev
                    eds = anno_trips[triple]
                    #print('aaaaaaaaaaa', g.qname(s), )
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'abbrev' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a abbrev "{asdf[a_o]["abbrev"]}" for "{abbrev}"'
                            )
                        asdf[a_o]['abbrev'] = abbrev

                #print(asdf)
                for ed, kwargs in sorted(asdf.items()):
                    if 'abbrev' not in kwargs:
                        print('Skipping', ed, 'for\n', kwargs)
                        continue
                    editions.append('{ed}|{label}|{abbrev}|{curie}'.format(
                        ed=g.qname(ed), **kwargs))

        with open('/tmp/' + filename + f'-{file_commit[:8]}.psv', 'wt') as f:
            f.write(out_header + '\n'.join(sorted(out, key=labelkey)))
        if editions:
            with open('/tmp/' + filename + f'-editions-{file_commit[:8]}.psv',
                      'wt') as f:
                f.write(editions_header +
                        '\n'.join(sorted(editions, key=edkey)))