Esempio n. 1
0
def npokb():
    index_graph = OntGraph(path=auth.get_path('ontology-local-repo') /
                           'ttl/generated/neurons/npokb-index.ttl')

    if index_graph.path.exists():
        index_graph.parse()

    # testing
    index_graph.bind('npokb', npokb)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in ('common-usage-types', 'huang-2017', 'markram-2015',
                'allen-cell-types'):
        path = auth.get_path(
            'ontology-local-repo') / f'ttl/generated/neurons/{eff}.ttl'
        input_graph = OntGraph(path=path)
        input_graph.parse()
        output_graph = input_graph.mapTempToIndex(index_graph, npokb, TEMP)
        ios.append((input_graph, output_graph))

    input_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(input_graph)
    index_graph.write()
    # [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Esempio n. 2
0
def main():
    olr = auth.get_path('ontology-local-repo')
    resources = auth.get_path('resources')
    if not olr.exists():
        raise FileNotFoundError(f'{olr} does not exist cannot continue')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    from docopt import docopt
    args = docopt(__doc__, version='parcellation 0.0.1')
    # import all ye submodules we have it sorted! LabelBase will find everything for us. :D
    if not args['--local']:
        from nifstd_tools.parcellation.aba import Artifacts as abaArts
    from nifstd_tools.parcellation.fsl import FSL  # Artifacts is attached to the class
    from nifstd_tools.parcellation.whs import Artifacts as whsArts
    from nifstd_tools.parcellation.berman import Artifacts as bermArts
    from nifstd_tools.parcellation.paxinos import Artifacts as paxArts
    from nifstd_tools.parcellation.swanson import Artifacts as swArts
    from nifstd_tools.parcellation.freesurfer import Artifacts as fsArts
    onts = getOnts()
    _ = *(print(ont) for ont in onts),
    out = build(*onts,
                parcBridge,
                fail=args['--fail'],
                n_jobs=int(args['--jobs']))
    if args['--stats']:
        breakpoint()
Esempio n. 3
0
def _get_oauth_service(api='sheets', version='v4', readonly=True, SCOPES=None):
    """ Inner implementation for get oauth. If you see this function used directly
        anywhere other than in googapis it is almost certainly a mistake. """

    if readonly:  # FIXME the division isn't so clean for drive ...
        _auth_var = 'google-api-store-file-readonly'
    else:
        _auth_var = 'google-api-store-file'

    try:
        store_file = auth.get_path(_auth_var)
    except KeyError as e:
        _msg = (f'No value found for {_auth_var} in {auth._path}\n'
                'See the previous error for more details about the cause.')
        raise ValueError(_msg) from e

    if store_file is None:
        _p = 'RUNTIME_CONFIG' if auth._path is None else auth._path
        # FIXME bad error message, need to check whether the key is even in
        # the user config, and yes we need our way to update the user config
        # and warn about unexpected formats for orthauth configs
        msg = (f'No file exists at the path specified by {_auth_var} in {_p}')
        log.debug(auth._runtime_config)
        log.debug(auth.user_config._runtime_config)
        raise ValueError(msg)

    # TODO log which file it is writing to ...
    if store_file.exists():
        with open(store_file, 'rb') as f:
            try:
                creds = pickle.load(f)
            except pickle.UnpicklingError as e:
                # FIXME need better way to trace errors in a way
                # that won't leak secrets by default
                log.error(f'problem in file at path for {_auth_var}')
                raise e
    else:
        creds = None
        if SCOPES is None:
            raise TypeError('SCOPES has not been set, possibly because this is\n'
                            'being called by a function that expects the store file\n'
                            'to already exist. Please run `googapis auth` with the\n'
                            'appropriate scope.')

    if not creds or not creds.valid:
        # the first time you run this you will need to use the --noauth_local_webserver args
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            creds_file = auth.get_path('google-api-creds-file')
            flow = InstalledAppFlow.from_client_secrets_file((creds_file).as_posix(), SCOPES)
            creds = flow.run_console()

        with open(store_file, 'wb') as f:
            pickle.dump(creds, f)

    service = build(api, version, credentials=creds)
    return service
Esempio n. 4
0
def catalog_extras(fetch=False):
    path = Path(auth.get_path('ontology-local-repo'), 'ttl')
    cat = (path / 'catalog-v001.xml').as_posix()
    with open((path / '../catalog-extras').as_posix(),
              'rt') as ce, open(cat, 'rt') as c:
        clines = c.readlines()
        celines = ce.readlines()

    if clines[-2] != celines[-1]:
        with open(cat, 'wt') as f:
            f.writelines(clines[:-1] + celines + clines[-1:])
    else:
        print(tc.blue('INFO:'),
              'extras already added to catalog doing nothing')

    if fetch:
        print(tc.blue('INFO:'), 'fetching extras')

        def fetch_and_save(url, loc):
            resp = requests.get(url)
            saveloc = (path / loc).as_posix()
            if resp.ok:
                with open(saveloc, 'wb') as f:
                    f.write(resp.content)

                print(tc.blue('INFO:'), f'{url:<60} written to {loc}')
            else:
                print(tc.red('WARNING:'), f'failed to fetch {url}')

        Async()(deferred(fetch_and_save)(url, loc) for line in celines
                for _, _, _, url, _, loc, _ in (line.split('"'), ))
Esempio n. 5
0
    def default(self):
        g = OntGraph().parse(
            auth.get_path('ontology-local-repo') / 'ttl/stimulation.ttl')
        preds = sorted(set(g.qname(p) for p in g.predicates()))

        header = [[
            'id', 'rdf:type', 'rdfs:domain', 'rdfs:range', 'rdfs:label',
            'NIFRID:synonym', 'NIFRID:abbrev', 'definition:', 'editorNote:',
            'rdfs:comment'
        ]]

        _rows = []
        for type_ in (
                owl.ObjectProperty,
                owl.Class,
        ):
            for s in sorted(g[:rdf.type:type_], key=natsort):
                if isinstance(s, rdflib.URIRef):
                    row = [g.qname(s), g.qname(type_)
                           ] + [fun(g, s) for fun in funs]
                    _rows.append(row)

        rows = header + _rows

        defs = Defs(readonly=not self.options.update)
        if self.options.update:
            defs.upsert(*rows)  # FIXME upsert broken on header reordering ?
            defs.commit()

        return rows
Esempio n. 6
0
def test():
    snchf = SnchFile.fromYaml('../test/sneech-file.yaml')
    snchf.writeTtl(aug.RepoPath('../test/rando-sneech-ttl.ttl').resolve())
    rp = aug.RepoPath(auth.get_path('ontology-local-repo'))
    wrangler = SneechWrangler(aug.RepoPath('~/git/sneechenator').expanduser())
    dir_snchn = wrangler.dir_process / 'test-sneechening'
    if not dir_snchn.exists():  # FIXME bad workflow
        dir_snchn.mkdir()

    path_index = wrangler.path_index(snchf.index)
    if not path_index.exists():
        path_index = wrangler.new_index(
            snchf.index)  # FIXME move inside Sneechenator? or no
        path_index.commit_from_working_tree(f'new index {snchf.index}')

    org_index = OntResGit(path_index)
    expanded = snchf.write(dir_snchn)  # TODO commit
    expanded.commit_from_working_tree(f'expanded snch file')
    sncher = Sneechenator(org_index, snchf.namespaces, snchf.orgs)
    #sncher.preSneechUpon(dir_snchn)
    rg, maybe_sneeches = sncher.sneechReviewGraph()
    # commit here I think ?
    # consider using ilxtr.maybeHasIlxId ?
    # TODO modified maybe_sneeches file + maybe list -> update list
    breakpoint()
Esempio n. 7
0
def main():
    with open(auth.get_path('curies'), 'rt') as f:
        curie_map = yaml.safe_load(f)

    curie_map['nlx_only'] = curie_map[
        '']  # map nlx_only to 'http://uri.neuinfo.org/nif/nifstd/'

    g = rdflib.Graph()
    g.parse('http://ontology.neuinfo.org/NIF/ttl/NIF-Cell.ttl',
            format='turtle')

    curiespaces = {k: rdflib.Namespace(v) for k, v in curie_map.items()}
    namespaces = {
        c_prefix: rdflib.Namespace(iri_prefix)
        for c_prefix, iri_prefix in g.namespaces()
    }

    subject = curiespaces['NIFCELL']['nifext_75']
    predicate = None
    object_ = None
    matches = [t for t in g.triples((subject, predicate, object_))]
    print(matches)
    if matches:
        predicate = matches[0][1].toPython()
        print(predicate)

    if __name__ == '__main__':
        breakpoint()
Esempio n. 8
0
class PaxSr_6(resSource):
    sourceFile = auth.get_path('resources') / 'paxinos09names.txt'
    artifact = Artifacts.PaxRat6

    @classmethod
    def loadData(cls):
        with open(cls.source, 'rt') as f:
            lines = [
                l.rsplit('#')[0].strip() for l in f.readlines()
                if not l.startswith('#')
            ]
        return [l.rsplit(' ', 1) for l in lines]

    @classmethod
    def processData(cls):
        structRecs = []
        out = {}
        for structure, abrv in cls.raw:
            structRecs.append((abrv, structure))
            if abrv in out:
                out[abrv][0].append(structure)
            else:
                out[abrv] = ([structure], ())
        return structRecs, out

    @classmethod
    def validate(cls, structRecs, out):
        print(Counter(_[0] for _ in structRecs).most_common()[:5])
        print(Counter(_[1] for _ in structRecs).most_common()[:5])
        assert len(structRecs) == len([
            s for sl, _ in out.values() for s in sl
        ]), 'There are non-unique abbreviations'
        errata = {}
        return out, errata
Esempio n. 9
0
def main():
    olr = auth.get_path('ontology-local-repo')
    ori = OntResIri('http://purl.obolibrary.org/obo/doid.owl')
    orp = OntResPath(olr / 'ttl/external/doid.owl')
    ort = ori
    g = ori.graph
    query = """
    SELECT DISTINCT ?s ?o ?l
    WHERE {
        ?s a owl:Class .
        ?s rdfs:subClassOf* <http://purl.obolibrary.org/obo/DOID_4> .
        ?s rdfs:subClassOf ?o .
        ?s rdfs:label ?l .
    }"""
    res = list(g.query(query))
    filt = [r for r in res if not isinstance(r[1], rdflib.BNode)]
    spath = 'ttl/generated/doidslim.ttl'
    go = OntGraph(path=olr / spath)
    # TODO prov record like the one we have for chebi
    go.bind('DOID', 'http://purl.obolibrary.org/obo/DOID_')
    s = rdflib.URIRef('http://ontology.neuinfo.org/NIF/' + spath)
    go.populate_from_triples(
        ((s, p, o) for p, o in
         ((rdf.type, owl.Ontology),
          (rdfs.label, rdflib.Literal("NIF DOID slim")),)))
    ds = rdflib.URIRef('http://purl.obolibrary.org/obo/DOID_4')
    go.add((ds, rdf.type, owl.Class))
    go.add((ds, rdfs.label, rdflib.Literal('disease')))
    go.populate_from_triples(
        (t for s, o, l in filt for t in
         ((s, rdf.type, owl.Class),
          (s, rdfs.subClassOf, o),
          (s, rdfs.label, l))))
    go.write()
Esempio n. 10
0
def main():
    pi = PhenotypeIndicators()
    trips = list(pi.triples)

    #yield from PhenotypeIndicators().triples
    g = pi.asGraph()
    g.write(auth.get_path('ontology-local-repo') / f'ttl/{pi.name}.ttl')
Esempio n. 11
0
def main():

    #InterLexSneechenator()
    test()

    return
    # testing
    index_graph.bind('ILX', ILX)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in ('phenotype-core.ttl', 'phenotypes.ttl'):
        path = auth.get_path('ontology-local-repo') / eff
        input_graph = OntGraph(path=path)
        input_graph.parse()
        output_graph = input_graph.mapTempToIndex(index_graph, ILX, ilxtr)
        ios.append((input_graph, output_graph))

    input_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(input_graph)
    index_graph.write()
    # [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Esempio n. 12
0
def _todo(utr):
    # real output
    glb = auth.get_path('git-local-base')
    uberon_edit = aug.RepoPath(
        glb) / 'NOFORK/uberon/src/ontology/uberon-edit.obo'
    of = oio.OboFile(path=uberon_edit, strict=False)
    utr.submit_to_obofile(of, 'UBERON', uberon_id_range)
    of.write(overwrite=True, version=oio.OBO_VER_ROBOT)
Esempio n. 13
0
def ncbigenemapping(may_need_ncbigene_added):
    #urlbase = 'https://www.ncbi.nlm.nih.gov/gene/?term=Mus+musculus+'
    urlbase = ('https://www.ncbi.nlm.nih.gov/gene?term='
               '({gene_name}[Gene%20Name])%20AND%20{taxon_suffix}[Taxonomy%20ID]&'
               'report=xml')
    urls = [urlbase.format(gene_name=n, taxon_suffix=10090) for n in may_need_ncbigene_added]
    done2 = {}
    for u in urls:
        if u not in done2:
            print(u)
            done2[u] = requests.get(u)

    base = auth.get_path('resources') / 'genesearch'
    if not base.exists():
        base.mkdir()

    for resp in done2.values():
        fn = OntId(resp.url).quoted
        with open(base / fn, 'wb') as f:
            f.write(resp.content)

    so_much_soup = [(resp.url, BeautifulSoup(resp.content, 'lxml')) for resp in done2.values()]

    trees = []
    for i, (url, soup) in enumerate(so_much_soup):
        pre = soup.find_all('pre')
        if pre:
            for p in pre[0].text.split('\n\n'):
                if p:
                    tree = etree.fromstring(p)
                    trees.append((url, tree))
        else:
            print('WAT', urls[i])

    dimension = 'ilxtr:hasExpressionPhenotype'
    errors = []
    to_add = []
    mapping = {}
    for url, tree in trees:
        taxon = tree.xpath('//Org-ref//Object-id_id/text()')[0]
        geneid = tree.xpath('//Gene-track_geneid/text()')[0]
        genename = tree.xpath('//Gene-ref_locus/text()')[0]
        if genename in may_need_ncbigene_added and taxon == '10090':
            print(f'{genename} = Phenotype(\'NCBIGene:{geneid}\', {dimension!r}, label={genename!r}, override=True)')
            to_add.append(geneid)
            mapping[genename] = f'NCBIGene:{geneid}'
        else:
            errors.append((geneid, genename, taxon, url))

    print(errors)
    _ = [print('NCBIGene:' + ta) for ta in to_add]

    #wat.find_all('div', **{'class':'rprt-header'})
    #wat.find_all('div', **{'class':'ncbi-docsum'})

    return mapping, to_add, errors
Esempio n. 14
0
def fix_file(path):
    with open(path, 'rt') as f:
        sin = f.read()

    sout = sin.replace(
        '~/git/NIF-Ontology',
        auth.get_path('ontology-local-repo').resolve().as_posix())
    with open(path, 'wt') as f:
        f.write(sout)

    return sin
Esempio n. 15
0
 def _mis_graph(self):
     """ for now easier to just get a fresh one, they are small """
     glb = pauth.get_path('git-local-base')
     olr = Path(glb / 'duplicates' / 'sparc-NIF-Ontology')
     graph = (rdflib.ConjunctiveGraph()
         .parse((olr / 'ttl/sparc-methods.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-core.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods-helper.ttl').as_posix(), format='turtle')
         #.parse((olr / 'ttl/methods.ttl').as_posix(), format='turtle')
     )
     return graph
Esempio n. 16
0
def npokb_mapping():
    index_graph = OntGraph(path=auth.get_path('ontology-local-repo') /
                           'ttl/generated/neurons/npokb-index.ttl')

    if index_graph.path.exists():
        index_graph.parse()

    # testing
    index_graph.bind('npokb', npokb)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in (
            'common-usage-types',
            'huang-2017',
            'markram-2015',
            'allen-cell-types',
    ):
        # FIXME if the index id is already being used it is still added as a temp id incorrectly
        path = auth.get_path(
            'ontology-local-repo') / f'ttl/generated/neurons/{eff}.ttl'
        org = OntResGit(
            path, ref='HEAD'
        )  # HEAD is default but just for clarity set it explicitly here
        prev_graph = org.graph
        input_graph = OntGraph(path=path)
        input_graph.parse()
        mapped_graph = input_graph.mapStableIdentifiers(
            prev_graph, ilxtr.origLabel)
        output_graph = mapped_graph.mapTempToIndex(index_graph, npokb, TEMP)
        ios.append((mapped_graph, output_graph))

    mapped_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(mapped_graph)
    index_graph.write()
    [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Esempio n. 17
0
def scigraph_stress(rate,
                    timeout=5,
                    verbose=False,
                    debug=False,
                    scigraph=auth.get('scigraph-api')):
    # TODO use the api classes
    with open((auth.get_path('resources') / 'chebi-subset-ids.txt').as_posix(),
              'rt') as f:
        urls = [
            os.path.join(scigraph, f'vocabulary/id/{curie.strip()}')
            for curie in f.readlines()
        ]
    print(urls)
    url_blaster(urls, rate, timeout, verbose, debug)
Esempio n. 18
0
class ChebiIdsSrc(Source):
    source = auth.get_path('resources') / 'chebi-subset-ids.txt'
    source_original = True

    @classmethod
    def loadData(cls):
        ug = makeGraph('utilgraph', prefixes=uPREFIXES)
        with open(cls.source, 'rt') as f:
            ids_raw = set(_.strip() for _ in f.readlines())
            ids = set(ug.expand(_.strip()).toPython() for _ in ids_raw)
            return ids_raw, ids

    @classmethod
    def validate(cls, a):
        return a
Esempio n. 19
0
class HCPMMPSrc(resSource):
    sourceFile = auth.get_path(
        'resources') / 'human_connectome_project_2016.csv'
    source_original = True
    artifact = Artifacts.HCPMMP

    @classmethod
    def loadData(cls):
        with open(cls.source, 'rt') as f:
            return [r for r in csv.reader(f)][1:]  # skip header

    @classmethod
    def processData(cls):
        return cls.raw,

    @classmethod
    def validate(cls, d):
        return d
Esempio n. 20
0
def ncbigene_make():
    IDS_FILE = auth.get_path('resources') / 'gene-subset-ids.txt'
    with open(IDS_FILE.as_posix(), 'rt') as f:  # this came from neuroNER
        ids = [l.split(':')[1].strip() for l in f.readlines()]

    #url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?retmode=json&retmax=5000&db=gene&id='
    #for id_ in ids:
    #data = requests.get(url + id_).json()['result'][id_]
    url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
    data = {
        'db': 'gene',
        'retmode': 'json',
        'retmax': 5000,
        'id': None,
    }
    chunks = []
    for i, idset in enumerate(chunk_list(ids, 100)):
        print(i, len(idset))
        data['id'] = ','.join(idset),
        resp = requests.post(url, data=data).json()
        chunks.append(resp)

    base = chunks[0]['result']
    uids = base['uids']
    for more in chunks[1:]:
        data = more['result']
        uids.extend(data['uids'])
        base.update(data)
    #base['uids'] = uids  # i mean... its just the keys
    base.pop('uids')

    ng = createOntology(
        'ncbigeneslim',
        'NIF NCBI Gene subset',
        makePrefixes('ilxtr', 'NIFRID', 'NCBIGene', 'NCBITaxon', 'skos', 'owl',
                     'SO'),
        'ncbigeneslim',
        f'This subset is automatically generated from the NCBI Gene database on a subset of terms listed in {IDS_FILE}.',
        remote_base='http://ontology.neuinfo.org/NIF/')

    for k, v in base.items():
        #if k != 'uids':
        ncbi(v, ng)
    ng.write()
Esempio n. 21
0
class WHSSDSrc(resSource):
    sourceFile = lambda v: auth.get_path('resources'
                                         ) / f'WHS_SD_rat_atlas_v{v}.label'
    source_original = True
    artifact = lambda v: getattr(Artifacts, f'WHSSD{v}')

    @classmethod
    def loadData(cls):
        with open(cls.source, 'rt') as f:
            lines = [l.strip() for l in f.readlines() if not l.startswith('#')]
        return [(l[:3].strip(), l.split('"', 1)[1].strip('"')) for l in lines]

    @classmethod
    def processData(cls):
        return cls.raw,

    @classmethod
    def validate(cls, d):
        return d
Esempio n. 22
0
class WHSSDilfSrc(resSource):
    sourceFile = lambda v: auth.get_path(
        'resources') / f'WHS_SD_rat_atlas_v{v}_labels.ilf'
    source_original = True
    artifact = lambda v: getattr(Artifacts, f'WHSSD{v}')
    predicates = lambda v: {
        ilxtr.labelPartOf: ilxtr[f'labelPartOf-whssd-{v}']
    }  # FIXME

    @classmethod
    def loadData(cls):
        tree = etree.parse(cls.source.as_posix())
        return tree

    @classmethod
    def processData(cls):
        tree = cls.raw

        def recurse(label_node, parent=None):
            name = label_node.get('name')
            abbrev = label_node.get('abbreviation')
            id = label_node.get('id')
            yield id, name, abbrev, parent
            for child in label_node.getchildren():
                if child.tag == 'label':
                    yield from recurse(child, parent=id)

        records = tuple()
        for structure in tree.xpath('//structure'):
            for lab in structure.getchildren():
                if lab.tag == 'label':
                    records += tuple(recurse(lab, None))

        return records,

    @classmethod
    def validate(cls, d):
        return d
Esempio n. 23
0
only = tuple()
skip = tuple()
ci_skip = tuple()
network_tests = (  # reminder that these only skip mains
    'closed_namespaces',
    'hierarchies',
    'make_catalog',
    'scig',
    'scigraph_codegen',
    ['ontload', 'graph'],
    ['ontutils', 'deadlinks'],
    ['ontutils', 'version-iri'],
)
#requests.exceptions.SSLError

if auth.get_path('scigraph-services') is None:
    skip += (
        'scigraph_deploy',
    )  # this will fail # FIXME this should really only skip main not both main and import?

working_dir = get_working_dir(__file__)
if working_dir is None:
    # python setup.py test will run from the module_parent folder
    # I'm pretty the split was only implemented because I was trying
    # to run all tests from the working_dir in one shot, but that has
    # a number of problems with references to local vs installed packages
    import inspect
    sf = inspect.getsourcefile(pyontutils)
    working_dir = Path(sf).parent.parent
else:
Esempio n. 24
0
def main():
    import rdflib
    from pyontutils.core import makeGraph, makePrefixes, log
    from pyontutils.config import auth

    ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl'
    ncrb = auth.get_path(
        'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl'
    if not ub.exists() or not ncrb.exists():
        # just skip this if we can't file the files
        log.warning(f'missing file {ub} or {ncrb}')
        return

    graph = rdflib.Graph()
    graph.parse(ub.as_posix(), format='turtle')
    graph.parse(ncrb.as_posix(), format='ttl')

    ecgraph = rdflib.Graph()
    oec = EquivalentClass()
    test = tuple(oec.parse(graph=graph))

    ft = oc_.full_combinator(test[0][0], test[0][1])
    ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP'))
    *ft.serialize(ftng.g),
    ftng.write()

    _roundtrip = list(test[0][1](test[0][0]))
    roundtrip = oc_(test[0][0], test[0][1])  # FIXME not quite there yet...
    for t in roundtrip:
        ecgraph.add(t)
    ecng = makeGraph('thing2',
                     graph=ecgraph,
                     prefixes=makePrefixes('owl', 'TEMP'))
    ecng.write()
    if __name__ == '__main__':
        breakpoint()
        return
    r = Restriction(
        rdfs.subClassOf)  #, scope=owl.allValuesFrom)#NIFRID.has_proper_part)
    l = tuple(r.parse(graph=graph))
    for t in r.triples:
        graph.remove(t)
    ng = makeGraph('thing', graph=graph)
    ng.write()
    #print(l)
    restriction = Restriction(None)  #rdf.first)
    ll = List(lift_rules={owl.Restriction: restriction})
    trips = tuple(ll.parse(graph=graph))
    #subClassOf = PredicateCombinator(rdfs.subClassOf)  # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator)
    subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator)
    superDuperClass = subClassOf(
        TEMP.superDuperClass)  # has to exist prior to triples
    ec = oec(
        TEMP.ec1,
        TEMP.ec2,
        restriction(TEMP.predicate0, TEMP.target1),
        restriction(TEMP.predicate1, TEMP.target2),
    )
    egraph = rdflib.Graph()
    acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class),
                             (TEMP.hoh, 'FUN'))
    ft = flattenTriples((
        acombinator((TEMP.annotation, 'annotation value')),
        acombinator((TEMP.anotherAnnotation, 'annotation value again')),
        oc_(TEMP.c1, superDuperClass),
        oc_(TEMP.c2, superDuperClass),
        oc_(TEMP.c3, superDuperClass),
        oc_(TEMP.c4, superDuperClass),
        oc_(TEMP.c5, superDuperClass),
        oc_(TEMP.wat, subClassOf(TEMP.watParent)),
        oc_(TEMP.testSubject),
        ec(TEMP.testSubject),
        oc_(TEMP.more,
            oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))),
    ), )
    [egraph.add(t) for t in ft]
    eng = makeGraph('thing1',
                    graph=egraph,
                    prefixes=makePrefixes('owl', 'TEMP'))
    eng.write()
    if __name__ == '__main__':
        breakpoint()
Esempio n. 25
0
class BermanSrc(resSource):
    run_ocr = False
    source_images = Path('~/files/cropped').expanduser()
    source = 'https://github.com/tgbugs/pyontutils.git'
    sourceFile = auth.get_path('resources') / 'berman'
    source_original = False
    artifact = Artifacts.BermanCat

    @classmethod
    def loadData(cls):
        """ Sigh, this was indeed a poorly conceived approach
        since it hard blocks when the files are not in the source
        so you can't easily bootstrap from another source and the
        cognitive overhead is way, way too high :/ 

        Adding dry_run/bootstrap to __new__ sort of helps? """
        """ Have to run this out here because resSource is handicapped """
        data = []
        if cls.source_images.exists():
            for folder in cls.source_images.glob('*'):
                plate_num = int(folder.stem)
                text_file = cls.source / f'{plate_num}.txt'
                if not text_file.exists() or cls.run_ocr:
                    legends = []
                    raw_text = ''
                    for img in folder.glob('*.png'):
                        print('num', plate_num, img.stem)
                        p = subprocess.Popen(
                            ('tesseract', img.as_posix(), 'stdout', '-l',
                             'eng', '--oem', '2', '--psm', '6'),
                            stdout=subprocess.PIPE)
                        bytes_text, err = p.communicate()
                        raw_text += bytes_text.decode() + '\n'

                    with open(text_file, 'wt') as f:
                        f.write(raw_text)
                else:
                    with open(text_file, 'rt') as f:
                        raw_text = f.read()

                legends = get_legends(raw_text)
                data.append((plate_num, legends))

        elif cls.source.exists():
            for text_file in cls.source.glob('*.txt'):
                plate_num = int(text_file.stem)
                with open(text_file, 'rt') as f:
                    raw_text = f.read()

                legends = get_legends(raw_text)
                data.append((plate_num, legends))

        return data

    @classmethod
    def processData(cls):
        data = cls.raw

        # ocr fixes
        # in theory could use the most frequent if > .75 are the same ...
        cor_l = {
            'abducens nerve': {
                'GN': '6N'
            },
            'alaminar spinal trigeminal nucleus, magnocellular division (14)':
            {
                '5SM': 'SSM'
            },
            'alaminar spinal trigeminal nucleus, parvocellular division (6)': {
                '5SP': 'SSP'
            },
            'central nucleus of the inferior colliculus (21)': {
                '1CC': 'ICC'
            },
            'cerebral cortex': {
                '¢': 'C'
            },
            'commissure of the inferior colliculi': {
                '1CO': 'ICO',
                'I1CO': 'ICO'
            },
            'corpus callosum': {
                '198': 'CC'
            },
            'inferior central nucleus (13)': {
                'C': 'CI',
                'Cl': 'CI'
            },
            'lateral tegmental field (3)': {
                'FIL': 'FTL'
            },
            'mesencephalic trigeminal nucleus (19)': {
                'SME': '5ME'
            },
            'motor trigeminal tract': {
                'SMT': '5MT'
            },
            'nucleus of the trapezoid body (15)': {
                'J': 'T'
            },
            'posterior interpeduncular nucleus, inner division': {
                'al': 'IPI'
            },  # wow ...
            'solitary tract': {
                '$': 'S'
            },
            'spinal trigeminal tract': {
                'SST': '5ST'
            },
            'statoacoustic nerve': {
                'BN': 'SN'
            },
            'superior central nucleus (22)': {
                's': 'CS'
            },
            'trigeminal nerve': {
                'SN': '5N'
            },
            'zona incerta': {
                'Z1': 'ZI'
            },
        }

        cor_a = {
            #'1': {'ependymal layer', 'superficial layer'},
            #'2': {'intermediate layer', 'molecular layer'},
            #'3': {'deep layer', 'oculomotor nucleus (27)', 'pyramidal layer'},
            #'4': {'polymorph layer', 'trochlear nucleus (23)'},
            'KF': {
                'KollikerFuse nucleus (17)': 'KéllikerFuse nucleus (17)'
            },
            'SCS': {
                'superior colliculus, supertficial layer (25)':
                'superior colliculus, superficial layer (25)'
            }
        }

        # close layer abbreviation issues
        # this of course means that abbrevs cannot be used as identifiers
        # but we already knew this
        abbrev_ok = {
            '1': {
                'superficial layer': 1,
                'ependymal layer': 1
            },
            '2': {
                'intermediate layer': 1,
                'molecular layer': 1
            },
            '3': {
                'oculomotor nucleus (27)': 4,
                'deep layer': 1,
                'pyramidal layer': 1
            },
            '4': {
                'polymorph layer': 1,
                'trochlear nucleus (23)': 1
            }
        }

        by_abbrev = collections.defaultdict(list)
        by_label = collections.defaultdict(list)
        abbrev_index = collections.defaultdict(list)
        label_index = collections.defaultdict(list)
        for n, legends in sorted(data):
            for abbrev, label in legends:
                if label in cor_l and abbrev in cor_l[label]:
                    abbrev = cor_l[label][abbrev]
                if abbrev in cor_a and label in cor_a[abbrev]:
                    label = cor_a[abbrev][label]
                by_abbrev[abbrev].append(label)
                by_label[label].append(abbrev)

                abbrev_index[abbrev].append(n)
                label_index[label].append(n)

        def dorder(thing, type=lambda v: v):
            return {
                k: type(v)
                for k, v in sorted(thing.items(), key=lambda kv: kv[0].lower())
            }

        by_abbrev = dorder(by_abbrev, collections.Counter)
        by_label = dorder(by_label, collections.Counter)

        prob_a = {k: v for k, v in by_abbrev.items() if len(v) > 1}
        prob_l = {k: v for k, v in by_label.items() if len(v) > 1}

        pnorma = {k: dict(v) for k, v in prob_a.items()}
        assert pnorma == abbrev_ok, f'problem in abbrevs\n{pnorma}\n{abbrev_ok}'
        assert not prob_l, f'problem in labels {prob_l}'

        index_abbrev = dorder(abbrev_index, tuple)
        index_label = dorder(label_index, tuple)

        ia = sorted(
            set([(tuple(l), a, index_label[list(l)[0]], index_abbrev[a])
                 for a, l in by_abbrev.items() if a not in abbrev_ok
                 and index_label[list(l)[0]] != index_abbrev[a]]))
        assert not ia, f'oops {ia}'

        il = sorted(
            set([(l, tuple(a), index_label[l], index_abbrev[list(a)[0]])
                 for l, a in by_label.items() if list(a)[0] not in abbrev_ok
                 and index_label[l] != index_abbrev[list(a)[0]]]))
        assert not il, f'oops {il}'

        def paren_thing(label):
            if '(' in label:
                label_ws, pthing_cp = label.split('(', 1)
                return label_ws.strip(), int(pthing_cp.rstrip(')'))
            else:
                return label, None

        data_out = tuple(
            (*paren_thing(label), list(abbrev)[0], index_label[label])
            for label, abbrev in by_label.items())
        return data_out,

    @classmethod
    def validate(cls, d):
        return d
Esempio n. 26
0
        if not line:
            continue
        try:
            abbrev, label = line.split(' ', 1)
        except ValueError as e:
            print(repr(line))
            print(repr(raw_text))
            raise e
            continue
        abbrev = clean(abbrev)
        label = clean(label)
        legends.append((abbrev, label))
    return legends


resources = auth.get_path('resources')
if resources is not None:
    # FIXME TODO this is a bad way to handle this ...
    with open(resources / 'brainmaps-cat-abbrevs.html', 'rt') as f:
        dat = f.read()

    asoup = BeautifulSoup(dat, 'lxml')


class BermanSrc(resSource):
    run_ocr = False
    source_images = Path('~/files/cropped').expanduser()
    source = 'https://github.com/tgbugs/pyontutils.git'
    sourceFile = auth.get_path('resources') / 'berman'
    source_original = False
    artifact = Artifacts.BermanCat
Esempio n. 27
0
    def default(self):
        out_path = self.options.out_path
        BUILD = self.options.BUILD

        glb = Path(auth.get_path('git-local-base'))
        theme_repo = glb / 'org-html-themes'
        theme = theme_repo / 'setup/theme-readtheorg-local.setup'
        prepare_paths(BUILD, out_path, theme_repo, theme)

        doc_config = self._doc_config
        names = tuple(doc_config['repos']) + tuple(
            self.options.repo)  # TODO fetch if missing ?
        repo_paths = [(glb / name).resolve() for name in names]
        repos = [p.repo for p in repo_paths]
        skip_folders = doc_config.get('skip-folders', tuple())
        rskip = doc_config.get('skip', {})

        # TODO move this into run_all
        docstring_kwargs = makeDocstrings(BUILD, repo_paths, skip_folders,
                                          rskip)
        wd_docs_kwargs = [docstring_kwargs]
        if self.options.docstring_only:
            [
                kwargs.update({'theme': theme})
                for _, _, kwargs in wd_docs_kwargs
            ]
            outname, rendered = render_docs(wd_docs_kwargs,
                                            out_path,
                                            titles=None,
                                            n_jobs=1,
                                            debug=self.options.debug)[0]
            if not outname.parent.exists():
                outname.parent.mkdir(parents=True)
            with open(outname.as_posix(), 'wt') as f:
                f.write(rendered)
            return

        et = tuple()
        wd_docs_kwargs += [
            (rp, rp / f, makeKwargs(rp, f)) for rp in repo_paths
            for f in rp.repo.git.ls_files().split('\n')
            if Path(f).suffix in suffixFuncs and only(rp, f) and noneMembers(
                f, *skip_folders) and f not in rskip.get(rp.name, et)
        ]

        [kwargs.update({'theme': theme}) for _, _, kwargs in wd_docs_kwargs]

        if self.options.spell:
            spell((f.as_posix() for _, f, _ in wd_docs_kwargs))
            return

        titles = doc_config['titles']

        outname_rendered = render_docs(wd_docs_kwargs,
                                       out_path,
                                       titles,
                                       self.options.jobs,
                                       debug=self.options.debug)

        index = [
            f'<b class="{heading}">{heading}</b>'
            for heading in doc_config['index']
        ]

        _NOTITLE = object()
        for outname, rendered in outname_rendered:
            apath = outname.relative_to(self.options.out_path)
            title = titles.get(apath.as_posix(), _NOTITLE)
            # TODO parse out/add titles
            if title is not None:
                value = (hfn.atag(apath) if title is _NOTITLE else hfn.atag(
                    apath, title))
                index.append(value)

            if not outname.parent.exists():
                outname.parent.mkdir(parents=True)

            with open(outname.as_posix(), 'wt') as f:
                f.write(rendered)

        lt = list(titles)

        def title_key(a):
            title = a.split('"')[1]
            if title not in lt:
                msg = (f'{title} missing from {self.options.config}')
                raise ValueError(msg)
            return lt.index(title)

        index_body = '<br>\n'.join(['<h1>Documentation Index</h1>'] +
                                   sorted(index, key=title_key))
        with open((out_path / 'index.html').as_posix(), 'wt') as f:
            f.write(hfn.htmldoc(index_body, title=doc_config['title']))
Esempio n. 28
0
#!/usr/bin/env python3.7
import tempfile
from pyontutils.config import auth
from augpathlib import RepoPath as Path
temp_path = Path(tempfile.tempdir)
_ddconf = auth.get_path('resources') / 'doc-config.yaml'
_ddpath = temp_path / 'build-ont-docs' / 'docs'
__doc__ = f"""Compile all documentation from git repos.

Usage:
    ont-docs [options] [--repo=<REPO>...]

Options:
    -h --help             show this
    -c --config=<PATH>    path to doc-index.yaml [default: {_ddconf}]
    -o --out-path=<PATH>  path inside which docs are built [default: {_ddpath}]
    -b --html-root=<REL>  relative path to the html root [default: ..]
    -s --spell            run hunspell on all docs
    -d --docstring-only   build docstrings only
    -j --jobs=NJOBS       number of jobs [default: 9]
    -r --repo=<REPO>      additional repos to crawl for docs
    --debug               redirect stderr to debug pipeline errors

"""
import os
import re
import ast
import shutil
import logging
import subprocess
from pathlib import PurePath
Esempio n. 29
0
def swanson():
    """ not really a parcellation scheme
        NOTE: the defining information up here is now deprecated
        it is kept around to keep the code further down happy """

    source = auth.get_path('resources') / 'swanson_aligned.txt'
    ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/'
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = SwansonLabels.prefixes
    new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/')
    new_graph.add_ont(ontid,
                      'Swanson brain partomies',
                      'Swanson 2014 Partonomies',
                      'This file is automatically generated from ' + source.as_posix() + '.' + '**FIXME**',
                      'now')

    # FIXME citations should really go on the ... anatomy? scheme artifact
    definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.'
    definingCitationID = 'ISBN:9780195340624'
    new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation)
    new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID)

    with open(source, 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN')
    lines[635] = fixed

    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.'*5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP =  n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP =  l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None

            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = Async()(deferred(sgv.findByTerm)(d[1]) for d in data)
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'curie' in r and 'UBERON' in r['curie']] if _ else [] for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    # TODO annotate the appendicies and the classes with these
    appendix_root_mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(self.children)
                        self.appendicies[self._appendix]['parents'] = dict(self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':apname.capitalize(),
                        'type':self.citation.capitalize() if self.citation else None}
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    breakpoint()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = PREFIXES['SWAN'] + '%s'
    json_ = {'nodes':[],'edges':[]}
    parent = ilxtr.swansonBrainRegionConcept

    og = OntGraph()
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, parent, label=anns['label'])
        new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)})
        #if anns['uberon']:
            #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon'])  # issues arrise here...
        [og.add(t) for t in map_term(rdflib.URIRef(nid), anns['label'], prefix='UBERON')]

    og.write(auth.get_path('ontology-local-repo') /
             'ttl/generated/swanson-uberon-mapping.ttl')
    #hrm = [(anns['label'], gn(anns['label'])) for node, anns in sp.nodes.items()]
    #ok = [(h, test, term_source(h, test)) for h, test in hrm if test]
    #notok = [h for h, test in hrm if not test]

    for appendix, data in sp.appendicies.items():
        aid = PREFIXES['SWAA'] + str(appendix)
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_trip(aid, 'ilxtr:hasTaxonRank', data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = 'swanr:hasPart' + str(appendix)
        apo = 'swanr:partOf' + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items():  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_restriction(pid, ahp, cid)  # note hierarhcy inverts direction
                new_graph.add_restriction(cid, apo, pid)
                json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)})

    return new_graph
Esempio n. 30
0
class SwansonAppendix(resSource):
    sourceFile = auth.get_path('resources') / 'swanson_aligned.txt'
    artifact = Artifacts.SwansonAppendix