コード例 #1
0
def cli(
    ctx,
    # config,
    out,
    interactive,
    debug,
    # input,
):
    context = ctx.obj = Context(script_path=SCRIPT_PATH)
    context.set('debug', debug)
    context.set('interactive', interactive)

    cfg = context.config
    cfg.out_dir = out

    context.log('Started')

    if ctx.invoked_subcommand is not None:
        context.logd(f'cli: I am about to invoke {ctx.invoked_subcommand}')
        return

    context.logd('cli: I was invoked without subcommand')
    ctx.invoke(subcommand)
    context.log('Finished')

    if context.get('interactive'):
        breakpoint()
コード例 #2
0
ファイル: hierarchies.py プロジェクト: gsanou/pyontutils
def _main():
    rtco = 'http://purl.org/sig/ont/fma/constitutional_part_of'
    rtro = 'http://purl.org/sig/ont/fma/regional_part_of'
    #rtc = 'http://purl.org/sig/ont/fma/constitutional_part'.replace('/','%2F')  # FIXME the sub/pred relation is switched :/
    #rtr = 'http://purl.org/sig/ont/fma/regional_part'.replace('/','%2F')
    json_co = sgg_local.getEdges(rtco, limit=9999999999)
    json_ro = sgg_local.getEdges(rtro, limit=9999999999)
    #json_c = g.getEdges(rtc, limit=9999999999)
    #json_r = g.getEdges(rtr, limit=9999999999)
    #inv_edges(json_c)
    #inv_edges(json_r)

    json = json_ro
    #json['nodes'].extend(json_co['nodes'])
    #json['edges'].extend(json_co['edges'])

    #json['nodes'].extend(json_c['nodes'])
    #json['edges'].extend(json_c['edges'])
    #json['nodes'].extend(json_r['nodes'])
    #json['edges'].extend(json_r['edges'])
    #breakpoint()


    #fma = Query('FMA:50801', 'None', 'INCOMING', 20)
    fma = Query('FMA:61817', 'None', 'INCOMING', 20)  # Cerebral hemisphere
    fma_tree, fma_extra = creatTree(*fma, json=json)
    with open(f'{tempfile.tempdir}/rc_combo_tree', 'wt') as f: f.write(str(fma_tree))

    breakpoint()
コード例 #3
0
def main():
    files = glob((gitf / 'methodsOntology-upstream/to_be_integrated_in_NIF/'
                  ).as_posix() + '*')
    rows = []
    got_header = False
    for file in files:
        with open(file, 'rt') as f:
            r = [r for r in csv.reader(f, delimiter='|')]
        if got_header:
            r = r[1:]
        else:
            got_header = True
        rows.extend(r)

    def async_func(row):
        resps = sgv.findByTerm(row[2])
        if resps:
            n = resps[0]
            c, l = n['curie'], n['labels'][0]
        else:
            c, l = None, None
        r = row + [c, l]
        return r

    matched = [rows[0] + ['e_curie', 'e_label']] + async_getter(
        async_func, [(r, ) for r in rows[1:]])

    breakpoint()
コード例 #4
0
ファイル: nif_cell.py プロジェクト: tgbugs/pyontutils
def main():
    with open(auth.get_path('curies'), 'rt') as f:
        curie_map = yaml.safe_load(f)

    curie_map['nlx_only'] = curie_map[
        '']  # map nlx_only to 'http://uri.neuinfo.org/nif/nifstd/'

    g = rdflib.Graph()
    g.parse('http://ontology.neuinfo.org/NIF/ttl/NIF-Cell.ttl',
            format='turtle')

    curiespaces = {k: rdflib.Namespace(v) for k, v in curie_map.items()}
    namespaces = {
        c_prefix: rdflib.Namespace(iri_prefix)
        for c_prefix, iri_prefix in g.namespaces()
    }

    subject = curiespaces['NIFCELL']['nifext_75']
    predicate = None
    object_ = None
    matches = [t for t in g.triples((subject, predicate, object_))]
    print(matches)
    if matches:
        predicate = matches[0][1].toPython()
        print(predicate)

    if __name__ == '__main__':
        breakpoint()
コード例 #5
0
ファイル: __init__.py プロジェクト: tgbugs/pyontutils
def main():
    olr = auth.get_path('ontology-local-repo')
    resources = auth.get_path('resources')
    if not olr.exists():
        raise FileNotFoundError(f'{olr} does not exist cannot continue')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    from docopt import docopt
    args = docopt(__doc__, version='parcellation 0.0.1')
    # import all ye submodules we have it sorted! LabelBase will find everything for us. :D
    if not args['--local']:
        from nifstd_tools.parcellation.aba import Artifacts as abaArts
    from nifstd_tools.parcellation.fsl import FSL  # Artifacts is attached to the class
    from nifstd_tools.parcellation.whs import Artifacts as whsArts
    from nifstd_tools.parcellation.berman import Artifacts as bermArts
    from nifstd_tools.parcellation.paxinos import Artifacts as paxArts
    from nifstd_tools.parcellation.swanson import Artifacts as swArts
    from nifstd_tools.parcellation.freesurfer import Artifacts as fsArts
    onts = getOnts()
    _ = *(print(ont) for ont in onts),
    out = build(*onts,
                parcBridge,
                fail=args['--fail'],
                n_jobs=int(args['--jobs']))
    if args['--stats']:
        breakpoint()
コード例 #6
0
ファイル: EKF.py プロジェクト: leet4th/sim-quadcopter
    def update(self, z):

        H = self.calcMeasurmentModelJac()

        PHT = self.P.dot(H.T)
        self.S = H.dot(PHT) + self.R
        #print(self.S.diagonal())
        try:
            self.K = PHT.dot(np.linalg.inv(self.S))
        except:
            breakpoint()

        self.hx = self.calcMeasurmentModel()
        self.res = z - self.hx

        self.state = self.state + self.K.dot(self.res)

        # P = (I-KH)P(I-KH)' + KRK' is more numerically stable
        # and works for non-optimal K vs the equation
        # P = (I-KH)P usually seen in the literature.
        I_KH = self._I - self.K.dot(H)
        self.P = np.dot(I_KH, self.P).dot(I_KH.T) + np.dot(self.K, self.R).dot(
            self.K.T)
        #self.P = I_KH.dot(self.P)

        #self.P = self.P - self.K.dot(self.S).dot(self.K.T)

        # Normalize quaternion
        self.normalizeQuaternion()

        # save measurement and posterior state
        self.z = deepcopy(z)
        self.state_post = self.state.copy()
        self.P_post = self.P.copy()
コード例 #7
0
def main():
    from docopt import docopt
    args = docopt(__doc__, version='ont-catalog 0.0.1')
    dobig = args['--big']
    remote_base = 'http://ontology.neuinfo.org/NIF/ttl/'
    olr = Path(args['--ontology-local-repo'])
    local_base = (olr / 'ttl').as_posix() + '/'

    #list of all nif ontologies
    #onts = [f for f in fs if f.endswith('.ttl') or f.endswith('.owl') and 'NEMO_' not in f]

    repo = Repo(olr)
    repo_path = Path(olr)
    tracked_files = [
        (repo_path / f).as_posix()
        # FIXME missing scicrunch-registry.ttl
        for f in repo.git.ls_files('--', 'ttl/').split('\n')
        if f.endswith('.ttl') or f.endswith('.owl')
    ]

    #_ = [print(f) for f in fs]

    extra_files = []  # TODO pass in via cli?
    mapping = [(remote_base + fragment, fragment)
               for file in tracked_files + extra_files
               for _, fragment in (file.split('/ttl/', 1), )]

    # check for mismatched import and ontology iris
    itrips = local_imports(
        remote_base, local_base, tracked_files, readonly=True,
        dobig=dobig)  # XXX these files are big and slow, run at own peril
    sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p}

    # FIXME should be able to do this by checking what is tracked by git...
    externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl',
                 'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl',
                 'uberon.owl')

    for f in tracked_files + extra_files:
        if '/external/' in f and anyMembers(f, *externals):
            basename = os.path.basename(f)
            if basename in sa:
                target = sa[basename]
                if 'external' not in target:
                    mapping.append((target, 'external/' + basename))

    # make a protege catalog file to simplify life
    uriline = '    <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>'

    xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
    '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \
    [uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \
    ['</catalog>']
    xml = '\n'.join(xmllines)
    with open(f'{tempfile.tempdir}/nif-catalog-v001.xml', 'wt') as f:
        f.write(xml)

    if args['--debug']:
        breakpoint()
コード例 #8
0
ファイル: overlaps.py プロジェクト: tgbugs/pyontutils
def extract(files, graphs):
    fn_graphs = {sn(f): g for f, g in zip(files, graphs)}
    results = comb(fn_graphs)
    overlaps = {k: v for k, v in results.items() if v}
    no_bri_inf = {
        k: v
        for k, v in overlaps.items() if '-Infe' not in k and '-Bridge' not in k
    }
    breakpoint()
コード例 #9
0
ファイル: gen_nat_models.py プロジェクト: tgbugs/pyontutils
def main():
    source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv'
    delimiter = ';'

    resp = requests.get(source)
    rows = [
        r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter)
        if r and r[0][0] != '#'
    ]
    header = [
        'Record_ID', 'parent_category', 'name', 'description', 'required_tags'
    ]

    PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition')
    graph = makeGraph('measures', prefixes=PREFIXES)

    class nat(rowParse):
        def Record_ID(self, value):
            print(value)
            self.old_id = value
            self._id = TEMP[value]

        def parent_category(self, value):
            self.super_old_id = value
            self.super_id = TEMP[value]

        def name(self, value):
            self.hidden = value
            self.label = value.replace('_', ' ')

        def description(self, value):
            self.definition = value

        def required_tags(self, value):
            pass

        def _row_post(self):
            graph.add_class(self._id, self.super_id, label=self.label)
            graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden)
            graph.add_trip(self._id, 'definition:', self.definition)

    asdf = nat(rows, header)
    graph.write()
    if __name__ == '__main__':
        breakpoint()
コード例 #10
0
ファイル: EKF_old.py プロジェクト: leet4th/sim-quadcopter
    def update(self,z):



        # Extract States from State vector
        rL = self.state[0:3]
        vL = self.state[3:6]
        q_toLfromB = self.state[6:10]
        gyroBias = self.state[10:13]
        accelBias = self.state[13:16]

        H = self.dhdx()

        PHT = self.P.dot(H.T)
        self.S = H.dot(PHT) + self.R
        #print(self.S.diagonal())
        try:
            self.K = PHT.dot( np.linalg.inv(self.S) )
        except:
            breakpoint()

        self.hx = self.h()
        self.res = z - self.hx

        self.state = self.state + self.K.dot(self.res)

        # P = (I-KH)P(I-KH)' + KRK' is more numerically stable
        # and works for non-optimal K vs the equation
        # P = (I-KH)P usually seen in the literature.
        I_KH = self._I - self.K.dot(H)
        self.P = np.dot(I_KH,self.P).dot(I_KH.T) + np.dot(self.K, self.R).dot(self.K.T)
        #self.P = I_KH.dot(self.P)

        #self.P = self.P - self.K.dot(self.S).dot(self.K.T)

        # Normalize quaternion
        self.normalizeQuaternion()

        # save measurement and posterior state
        self.z = deepcopy(z)
        self.state_post = self.state.copy()
        self.P_post = self.P.copy()
コード例 #11
0
def main():
    #cv1 = CutsV1Lite()
    CutsV1.fetch_grid = False
    cv1 = CutsV1()
    hrm = [cv1.row_object(i) for i, r in enumerate(cv1.values)
           if cv1.row_object(i).exhasmolecularphenotype().value]
    to_sco = set(t for h in hrm for t in h.entailed_molecular_phenotypes())
    ros = [cv1.row_object(i + 1) for i, r in enumerate(cv1.values[1:])]
    to_fix = [r for r in ros if list(r.entailed_molecular_phenotypes())]
    #maybe_fixed = [t.neuron_cleaned() for t in to_fix]
    #assert maybe_fixed != [f.neuron_existing() for f in to_fix]
    config = Config('common-usage-types')
    _final = [r.neuron_cleaned() for r in ros if r.include()]
    final = [f for f in _final if f is not None]  # FIXME there are 16 neurons marked as yes that are missing
    #fixed = [f for f in final if [_ for _ in f.pes if isinstance(_, EntailedPhenotype)]]
    [f._sigh() for f in final]
    config.write()
    config.write_python()
    if __name__ == '__main__':
        breakpoint()
コード例 #12
0
ファイル: hierarchies.py プロジェクト: gsanou/pyontutils
def creatTree(root, relationshipType, direction, depth, graph=None, json=None, filter_prefix=None, prefixes=uPREFIXES, html_head=tuple(), local=False, verbose=False, curie=None, entail=True):
    sgg = graph
    html_head = list(html_head)
    # TODO FIXME can probably switch over to the inverse of the automata I wrote for parsing trees in parc...
    if json is None:
        j, root_iri = queryTree(root, relationshipType, direction, depth, entail,
                                sgg, filter_prefix, curie)
        # FIXME stick this on sgg ...
        # FIXME some magic nonsense for passing the last query to sgg out
        # yet another reason to objectify this (heh)
        html_head.append('<link rel="http://www.w3.org/ns/prov#'
                         f'wasDerivedFrom" href="{sgg._last_url}">')  # FIXME WARNING leaking keys
    else:
        root_iri = None
        j = dict(json)
        if relationshipType is not None:
            j['edges'] = [e for e in j['edges'] if e['pred'] == relationshipType]
        #if 'meta' in j['nodes'][0]:  # check if we are safe to check meta
            #flag_dep(j)

    # filter out owl:Nothing
    j['edges'] = [e for e in j['edges'] if 'owl:Nothing' not in e.values()]

    if verbose:
        print(len(j['nodes']))

    (nodes, objects, subjects, names,
     pnames, edgerep, root, roots, leaves) = process_nodes(j, root, direction, verbose)

    if root is None:
        breakpoint()

    rootsl = '\n'.join(roots)
    tree_name = f'{rootsl}{relationshipType}{direction}{depth}'

    Tree, _ = newTree(tree_name, parent_dict=subjects)
    hierarchy, dupes = build_tree(Tree, root, objects, subjects, existing={}, flat_tree=set())
    _, nTreeNode = newTree('names' + tree_name, parent_dict=pnames)  # FIXME pnames is wrong...

    def rename(tree):
        dict_ = nTreeNode()
        for k in tree:
            dict_[nodes[k]] = rename(tree[k])
        return dict_

    htmlNodes = makeHtmlNodes(nodes, sgg, prefixes, local, root_iri, root)
    hpnames = {htmlNodes[k]:[htmlNodes[s] for s in v] for k, v in subjects.items()}
    _, hTreeNode = newTree('html' + tree_name, parent_dict=hpnames, html_head=html_head)

    def htmlTree(tree):
        dict_ = hTreeNode()
        for k in tree:
            dict_[htmlNodes[k]] = htmlTree(tree[k])
        return dict_

    try:
        named_hierarchy = rename(hierarchy)
        html_hierarchy = htmlTree(hierarchy)
    except KeyError as e:
        log.exception(e)
        breakpoint()
        raise e

    def sub_prefixes(h):
        if prefixes is not None:
            for n, p in prefixes.items():
                if type(p) != str:
                    p = str(p)
                h = h.replace('href="' + n + ':', 'href="' + p)
                h = h.replace('>' + p, '>' + n + ':')

        return h

    html_body = sub_prefixes(html_hierarchy.__html__())
    extras = Extras(hierarchy, html_hierarchy,
                    dupes, nodes, edgerep,
                    objects, subjects,
                    names, pnames, hpnames, j,
                    html_body, str(named_hierarchy))

    return named_hierarchy, extras
コード例 #13
0
ファイル: ontload.py プロジェクト: tgbugs/pyontutils
def run(args):
    # modes
    graph = args['graph']
    scigraph = args['scigraph']
    config = args['config']
    imports = args['imports']
    chain = args['chain']
    extra = args['extra']

    # required
    repo_name = args['<repo>']
    remote_base = args['<remote_base>']
    ontologies = args['<ontologies>']

    # options
    git_remote = args['--git-remote']
    git_local = Path(args['--git-local']).resolve()
    zip_location = Path(args['--zip-location']).resolve()
    graphload_config = Path(args['--graphload-config']).resolve()
    graphload_config_template = graphload_config  # NOTE XXX
    if args['--graphload-ontologies'] is not None:
        graphload_ontologies = Path(args['--graphload-ontologies']).resolve()
    else:
        graphload_ontologies = None

    org = args['--org']
    branch = args['--branch']
    commit = args['--commit']
    scp = args['--scp-loc']
    sorg = args['--scigraph-org']
    sbranch = args['--scigraph-branch']
    scommit = args['--scigraph-commit']
    sscp = args['--scigraph-scp-loc']
    scigraph_quiet = args['--scigraph-quiet']
    patch_config = args['--patch-config']
    curies_location = args['--curies']
    patch = args['--patch']
    check_built = args['--check-built']
    debug = args['--debug']
    log = args['--logfile']  # TODO
    fix_imports_only = args['--fix-imports-only']

    load_base = 'scigraph-load -c {config_path}'  # now _this_ is easier

    if args['--view-defaults']:
        for k, v in defaults.items():
            print(f'{k:<22} {v}')
        return

    # post parse mods
    if remote_base == 'NIF':
        remote_base = 'http://ontology.neuinfo.org/NIF'

    itrips = None

    if repo_name is not None:
        local_base = jpth(git_local, repo_name)

    if graph:
        if args['--path-build-scigraph']:  # path-build-scigraph
            path_build_scigraph = Path(args['--path-build-scigraph'])
            (scigraph_commit, services_zip,
             scigraph_reset_state) = scigraph_build(path_build_scigraph,
                                                    git_remote,
                                                    sorg,
                                                    path_build_scigraph,
                                                    sbranch,
                                                    scommit,
                                                    check_built=check_built,
                                                    cleanup_later=True,
                                                    quiet=scigraph_quiet)
        else:
            scigraph_commit = 'dev-9999'
            services_zip = 'None'
            scigraph_reset_state = lambda: None

        with execute_regardless(scigraph_reset_state):
            rl = ReproLoader(
                zip_location,
                git_remote,
                org,
                git_local,
                repo_name,
                branch,
                commit,
                remote_base,
                load_base,
                graphload_config_template,
                graphload_ontologies,
                patch_config,
                patch,
                scigraph_commit,
                fix_imports_only=fix_imports_only,
                check_built=check_built,
            )

        if not fix_imports_only:
            FILE_NAME_ZIP = Path(rl.zip_path).name
            LATEST = Path(zip_location) / 'LATEST'
            if LATEST.exists() and LATEST.is_symlink():
                LATEST.unlink()

            LATEST.symlink_to(FILE_NAME_ZIP)

            itrips, config = rl.itrips, rl.config

            if not ontologies:
                ontologies = rl.ontologies

            print(services_zip)
            print(rl.zip_path)
            if '--local' in args:
                return

    elif scigraph:
        (scigraph_commit, services_zip,
         _) = scigraph_build(zip_location,
                             git_remote,
                             sorg,
                             git_local,
                             sbranch,
                             scommit,
                             check_built=check_built,
                             quiet=scigraph_quiet)
        print(services_zip)
        if '--local' in args:
            return

    elif config:
        #graph_path = Path(args['<graph_path>']).resolve()
        config_path = Path(args['--graph-config-out']).resolve()
        #local_base = Path(git_local, repo_name).resolve()
        date_today = TODAY()
        ReproLoader.make_graphload_config(graphload_config_template,
                                          graphload_ontologies, zip_location,
                                          date_today, config_path)

    elif imports:
        # TODO mismatch between import name and file name needs a better fix
        itrips = local_imports(remote_base, local_base, ontologies)
    elif chain:
        itrips = local_imports(remote_base,
                               local_base,
                               ontologies,
                               readonly=True)
    elif extra:
        from nifstd_tools.utils import memoryCheck
        curies = getCuries(curies_location)
        curie_prefixes = set(curies.values())
        memoryCheck(2665488384)
        graph = loadall(git_local, repo_name)
        new_graph = normalize_prefixes(graph, curies)
        for_burak(new_graph)
        debug = True
    elif patch:
        local_base = jpth(git_local, repo_name)
        local_versions = tuple(do_patch(patch_config, local_base))
    else:
        raise BaseException('How did we possibly get here docopt?')

    if itrips:
        import_graph = OntGraph()
        [import_graph.add(t) for t in itrips]
        for tree, extra in import_tree(import_graph, ontologies):
            name = Path(next(iter(tree.keys()))).name
            with open(jpth(zip_location, f'{name}-import-closure.html'),
                      'wt') as f:
                f.write(extra.html.replace('NIFTTL:',
                                           ''))  # much more readable

    if debug:
        breakpoint()
コード例 #14
0
setLocalNames(phns.BBP)
setLocalContext(Phenotype('NCBITaxon:10090', ilxtr.hasInstanceInTaxon))
Neuron(Phenotype('UBERON:0001950', ilxtr.hasSomaLocatedIn, label='neocortex'))
Neuron(brain, Phenotype('PR:000013502'))
Neuron(Phenotype('UBERON:0000955'), Phenotype('CHEBI:18243'))
Neuron(Phenotype('UBERON:0001950', ilxtr.hasSomaLocatedIn))
Neuron(Phenotype('UBERON:0000955'), Phenotype('CHEBI:18243'), Phenotype('PR:000013502'))

def inner():
    Neuron(SOM, Phenotype('PR:000013502'))
inner()

#resetLocalNames()  # works as expected at the top level
#resetLocalNames(globals())  # works as expected
pv = Neuron(brain, Phenotype('PR:000013502'))

setLocalNames()
messup(pv)  # the localNames call inside here persists
print('testing printing pv after localNames is called inside messup')
print(repr(pv))

print(config.neurons())

if __name__ == '__main__':
    breakpoint()

# XXX these have to be called inside this module or the state persists in graphBase FIXME
resetLocalNames()
setLocalContext()
コード例 #15
0
ファイル: aba_uberon.py プロジェクト: tgbugs/pyontutils
def main():
    abagraph = rdflib.Graph()
    abagraph.parse(
        (gitf /
         'NIF-Ontology/ttl/generated/parcellation/mbaslim.ttl').as_posix(),
        format='turtle')
    abagraph.parse(
        (gitf / 'NIF-Ontology/ttl/bridge/aba-bridge.ttl').as_posix(),
        format='turtle')
    nses = {k: rdflib.Namespace(v) for k, v in abagraph.namespaces()}
    #nses['ABA'] = nses['MBA']  # enable quick check against the old xrefs
    syn_iri = nses['NIFRID']['synonym']
    acro_iri = nses['NIFRID']['acronym']
    abasyns = {}
    abalabs = {}
    abaacro = {}
    ABA_PREFIX = 'MBA:'
    #ABA_PREFIX = 'ABA:'  # all bad
    for sub in abagraph.subjects(rdflib.RDF.type, rdflib.OWL.Class):
        if not sub.startswith(nses[ABA_PREFIX[:-1]]['']):
            continue
        subkey = ABA_PREFIX + sub.rsplit('/', 1)[1]
        sub = rdflib.URIRef(sub)
        abalabs[subkey] = [
            o for o in abagraph.objects(rdflib.URIRef(sub), rdflib.RDFS.label)
        ][0].toPython()
        syns = []
        for s in abagraph.objects(sub, syn_iri):
            syns.append(s.toPython())
        abasyns[subkey] = syns

        abaacro[subkey] = [
            a.toPython() for a in abagraph.objects(sub, acro_iri)
        ]

    url = 'http://api.brain-map.org/api/v2/tree_search/Structure/997.json?descendants=true'
    resp = requests.get(url).json()

    ids = set([ABA_PREFIX + str(r['id']) for r in resp['msg']])
    Query = namedtuple('Query',
                       ['id', 'relationshipType', 'direction', 'depth'])
    #uberon = Query('UBERON:0000955', 'http://purl.obolibrary.org/obo/BFO_0000050', 'INCOMING', 9)
    uberon = Query('UBERON:0001062', 'subClassOf', 'INCOMING',
                   10)  # anatomical entity
    output = g.getNeighbors(**uberon._asdict())

    # TODO figure out the superclass that can actually get all the brain parts

    meta_edge = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref'

    u_a_map = {}
    a_u_map = {}
    uberon_syns = {}
    uberon_labs = {}
    syn_types = {
        'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym':
        'Exact',
        'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym':
        'Narrow',
        'http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym':
        'Related',
        'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym':
        'Broad',
    }
    for node in output['nodes']:
        curie = node['id']
        uberon_labs[curie] = node['lbl']
        uberon_syns[curie] = {}
        if 'synonym' in node['meta']:
            for stype in syn_types:
                if stype in node['meta']:
                    uberon_syns[curie][stype] = node['meta'][stype]

        if meta_edge in node['meta']:
            xrefs = node['meta'][meta_edge]
            mba_ref = [r for r in xrefs if r.startswith(ABA_PREFIX)]
            u_a_map[curie] = mba_ref
            if mba_ref:
                for mba in mba_ref:
                    a_u_map[mba] = curie
        else:
            u_a_map[curie] = None

    def obo_output(
    ):  # oh man obo_io is a terrible interface for writing obofiles :/
        for aid in abalabs:  # set aids not in uberon to none
            if aid not in a_u_map:
                a_u_map[aid] = None

        e = OboFile()
        n = OboFile()
        r = OboFile()
        b = OboFile()
        name_order = 'Exact', 'Narrow', 'Related', 'Broad'
        rev = {v: k for k, v in syn_types.items()}  # sillyness
        syn_order = [rev[n] for n in name_order]

        files_ = {
            rev['Broad']: b,
            rev['Exact']: e,
            rev['Narrow']: n,
            rev['Related']: r
        }
        for aid, uid in sorted(a_u_map.items()):
            id_line = 'id: ' + aid
            lines = []
            lines.append(id_line)
            lines.append('name: ' + abalabs[aid])
            if uid in uberon_syns:
                syns = uberon_syns[uid]
            else:
                syns = {}

            for syn_type in syn_order:
                f = files_[syn_type]
                if syn_types[syn_type] == 'Exact' and uid is not None:
                    syn_line = 'synonym: "' + uberon_labs[
                        uid] + '" ' + syn_types[syn_type].upper(
                        ) + ' [from label]'
                    lines.append(syn_line)
                if syn_type in syns:
                    for syn in sorted(syns[syn_type]):
                        syn_line = 'synonym: "' + syn + '" ' + syn_types[
                            syn_type].upper() + ' []'
                        lines.append(syn_line)
                block = '\n'.join(lines)
                term = Term(block, f)

        e.filename = 'e-syns.obo'
        n.filename = 'en-syns.obo'
        r.filename = 'enr-syns.obo'
        b.filename = 'enrb-syns.obo'
        for f in files_.values():
            h = Header('format-version: 1.2\nontology: %s\n' % f.filename)
            h.append_to_obofile(f)
            f.write(f.filename)
        #breakpoint()

    #obo_output()

    def make_record(uid, aid):  # edit this to change the format
        to_format = ('{uberon_id: <20}{uberon_label:}\n'
                     '{aba_id: <20}{aba_label}\n'
                     '------ABA  SYNS------\n'
                     '{aba_syns}\n'
                     '-----UBERON SYNS-----\n'
                     '{uberon_syns}\n')
        uberon_syn_rec = uberon_syns[uid]
        insert_uberon = []
        for edge, syns in sorted(uberon_syn_rec.items()):
            insert_uberon.append('--{abv}--\n{syns}'.format(
                abv=syn_types[edge], syns='\n'.join(sorted(syns))))

        kwargs = {
            'uberon_id': uid,
            'uberon_label': uberon_labs[uid],
            'aba_id': aid,
            'aba_label': abalabs[aid],
            'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])),
            'uberon_syns': '\n'.join(insert_uberon)
        }
        return to_format.format(**kwargs)

    #text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

    #with open('aba_uberon_syn_review.txt', 'wt') as f:
    #f.write(text)

    print('total uberon terms checked:', len(uberon_labs))
    print('total aba terms:           ', len(abalabs))
    print('total uberon with aba xref:',
          len([a for a in u_a_map.values() if a]))

    ubridge = createOntology('uberon-parcellation-mappings',
                             'Uberon Parcellation Mappings',
                             makePrefixes('owl', 'ilx', 'UBERON', 'MBA'))
    for u, arefs in u_a_map.items():
        if arefs:
            # TODO check for bad assumptions here
            ubridge.add_trip(u, 'ilx:delineatedBy', arefs[0])
            ubridge.add_trip(arefs[0], 'ilx:delineates', u)

    ubridge.write()
    if __name__ == '__main__':
        breakpoint()
コード例 #16
0
def main():
    DB_URI = 'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db}'
    if socket.gethostname() != 'orpheus':
        config = mysql_conn_helper('localhost', 'nif_eelg', 'nif_eelg_secure', 33060)  # see .ssh/config
    else:
        config = mysql_conn_helper('nif-mysql.crbs.ucsd.edu', 'nif_eelg', 'nif_eelg_secure')
    engine = create_engine(DB_URI.format(**config), echo=True)
    config = None
    del(config)

    insp = inspect(engine)
    terms = [c['name'] for c in insp.get_columns('terms')]
    term_existing_ids = [c['name'] for c in insp.get_columns('term_existing_ids')]
    #breakpoint()
    #sys.exit()

    query = engine.execute('SELECT * FROM term_existing_ids as teid JOIN terms as t ON t.id = teid.tid WHERE t.type != "cde"')
    header = term_existing_ids + terms

    data = query.fetchall()
    cdata = list(zip(*data))

    def datal(head):
        return cdata[header.index(head)]

    ilx_labels = {ilxb[ilx_fragment]:label for ilx_fragment, label in zip(datal('ilx'), datal('label'))}

    mapping_no_sao = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0]]  # 9446
    mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0] or '/sao' in p[0]]  # 9883
    done = [ilx for iri, ilx in mapping]
    obo_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'obolibrary' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in obo_mapping]
    db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'drugbank' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in db_mapping]
    t3db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 't3db' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in t3db_mapping]

    wiki_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neurolex' in p[0] and p[1] not in done]

    sao_mapping = {o.toPython():s for s, o in Graph().parse((gitf / 'nlxeol/sao-nlxwiki-fixes.ttl').as_posix(), format='ttl').subject_objects(oboInOwl.hasAlternativeId)}

    scr = Graph().parse((gitf / 'NIF-Ontology/scicrunch-registry.ttl').as_posix(), format='turtle')
    moved_to_scr = {}
    #PROBLEM = set()
    for s, o in scr.subject_objects(oboInOwl.hasDbXref):
        if 'SCR_' in o:
            print(f'WARNING Registry identifier listed as alt id! {s} hasDbXref {o}')
            continue
        uri = NIFSTD[o]
        #try:
        assert uri not in moved_to_scr, f'utoh {uri} was mapped to more than one registry entry! {s} {moved_to_scr[uri]}'
        #except AssertionError:
            #PROBLEM.add(uri)

        moved_to_scr[uri] = s

    to_scr = [(k, v) for k, v in moved_to_scr.items()
           if noneMembers(k, 'SciEx_', 'OMICS_', 'rid_', 'SciRes_',
                          'biodbcore-', 'C0085410', 'doi.org', 'C43960',
                          'doi:10.', 'GAZ:',
                          # 'birnlex_', 'nlx_', 'nif-'
                         )]

    replacement_graph = createOntology(filename='NIFSTD-ILX-mapping',
                        name='NLX* to ILX equivalents',
                        prefixes=makePrefixes('ILX'),)

    scr_rep_graph = createOntology(filename='NIFSTD-SCR-mapping',
                                   name='NLX* to SCR equivalents',
                                   prefixes=makePrefixes('SCR'),)

    _existing = {}
    def dupes(this, other, set_, dupes_):
        if this not in set_:
            set_.add(this)
            _existing[this] = other
        elif _existing[this] != other:
            dupes_[this].add(_existing[this])
            dupes_[this].add(other)

    iri_done = set()
    ilx_done = set()
    iri_dupes = defaultdict(set)
    ilx_dupes = defaultdict(set)
    def check_dupes(iri, ilx):
        dupes(iri, ilx, iri_done, iri_dupes)
        dupes(ilx, iri, ilx_done, ilx_dupes)

    BIRNLEX = Namespace(uPREFIXES['BIRNLEX'])
    trouble = [  # some are _2 issues :/
               # in interlex -- YES WE KNOW THEY DONT MATCH SOME IDIOT DID THIS IN THE PAST
               BIRNLEX['1006'],  # this one appears to be entirely novel despite a note that it was created in 2006...
               BIRNLEX['1152'],  # this was used in uberon ;_;
               BIRNLEX['2476'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2477'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2478'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2479'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2480'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2533'],  # This is in interlex as a wiki id http://uri.interlex.org/base/ilx_0109349 since never used in the ontology, we could add it to the list of 'same as' for cosmetic purposes which will probably happen...
               BIRNLEX['3074'],  # -> CHEBI:26848  # add to slim and bridge...
               BIRNLEX['3076'],  # -> CHEBI:26195  # XXX when we go to load chebi make sure we don't dupe this...
    ]

    aaaaaaaaaaaaaaaaaaaaaaaaaaaaa = [t + '_2' for t in trouble]  # _never_ do this

    # TODO check for cases where there is an ilx and scr for the same id >_<

    sao_help = set()
    for iri, ilx_fragment in chain(mapping, to_scr):  # XXX core loop
        if iri in sao_mapping:
            uri = sao_mapping[iri]
            sao_help.add(uri)
        else:
            uri = URIRef(iri)

        if uri in trouble:
            #print('TROUBLE', iri, ilxb[ilx_fragment])
            print('TROUBLE', ilxb[ilx_fragment])

        if uri in moved_to_scr:  # TODO I think we need to have _all_ the SCR redirects here...
            s, p, o = uri, ilxtr.hasScrId, moved_to_scr[uri]
            scr_rep_graph.g.add((s, p, o))
        else:
            s, p, o = uri, ilxtr.hasIlxId, ilxb[ilx_fragment]
            #s, p, o = o, ilxtr.ilxIdFor, s
            replacement_graph.g.add((s, p, o))

        check_dupes(s, o)

    dupes = {k:v for k, v in iri_dupes.items()}
    idupes = {k:v for k, v in ilx_dupes.items()}
    assert not dupes, f'there are duplicate mappings for an external id {dupes}'
    #print(ilx_dupes)  # there are none yet

    ng = cull_prefixes(replacement_graph.g, prefixes=uPREFIXES)
    ng.filename = replacement_graph.filename

    sng = cull_prefixes(scr_rep_graph.g, prefixes=uPREFIXES)
    sng.filename = scr_rep_graph.filename


    _ = [print(k.toPython(), ' '.join(sorted(ng.qname(_.toPython()) for _ in v))) for k, v in idupes.items()]

    # run `resolver_uris = sorted(set(e for t in graph for e in t if 'uri.neuinfo.org' in e))` on a graph with everything loaded to get this file...
    resources = Path(__file__).resolve().absolute().parent / 'resources'
    with open((resources / 'all-uri.neuinfo.org-uris.pickle').as_posix(), 'rb') as f:
        all_uris = pickle.load(f)  # come in as URIRefs...
    with open((resources / 'all-uri.neuinfo.org-uris-old.pickle').as_posix(), 'rb') as f:
        all_uris_old = pickle.load(f)  # come in as URIRefs...
    with open((resources / 'all-uri.neuinfo.org-uris-old2.pickle').as_posix(), 'rb') as f:
        all_uris_old2 = pickle.load(f)  # come in as URIRefs...

    resolver_uris = set(e for t in chain(ng.g, sng.g) for e in t if 'uri.neuinfo.org' in e)
    ilx_only = resolver_uris - all_uris  # aka nlxonly
    resolver_not_ilx_only = resolver_uris - ilx_only
    problem_uris = all_uris - resolver_uris
    old_uris = all_uris_old - all_uris
    old_uris2 = all_uris_old2 - all_uris
    dold_uris = all_uris_old - all_uris_old2

    #idold_uris = all_uris_old2 - all_uris_old  # empty as expected
    #nxrefs = Graph().parse((gitf / 'NIF-Ontology/ttl/generated/nlx-xrefs.ttl').as_posix(), format='turtle')
    nxrefs = Graph().parse((gitf / 'nlxeol/nlx-xrefs.ttl').as_posix(), format='turtle')
    xrefs_uris = set(e for t in nxrefs for e in t if 'uri.neuinfo.org' in e)
    test_old_uris = old_uris2 - xrefs_uris

    diff_uris = test_old_uris - ilx_only
    #diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_149160'))  # ORNL was included in an old bad version of the xrefs file and was pulled in in the old all-uris  # now dealt with by the scr mapping
    diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_40280,birnlex_1731'))  # one of the doubled neurolex ids
    diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd'))  # i have zero idea how this snuck in
    assert not diff_uris, 'old uris and problem uris should be identical'

    _ilx = set(e for t in ng.g for e in t)
    _scr = set(e for t in sng.g for e in t)
    for uri in ilx_only:
        if uri in _ilx and uri in _scr:
            raise BaseException('AAAAAAAAAAAAAAAAAAAAAAAAAAAAA')
        elif uri in _ilx:
            g = ng.g
        elif uri in _scr:
            g = sng.g
        else:
            raise BaseException('????????????')
        g.add((uri, ilxtr.isDefinedBy, URIRef('http://neurolex.org')))

    # XXX write the graphs
    ng.write()
    sng.write()

    nsuris = set(uri for uri, ilx in mapping_no_sao)
    auris = set(_.toPython() for _ in all_uris)
    iuris = set(_.toPython() for _ in resolver_uris)
    #sao_missing = iuris - nsuris  # now fixed and cannot run due to addition of scr ids to resolver_uris
    #assert not sao_missing, f'whoops {sao_missing}'
    ilx_missing = auris - iuris
    all_missing = iuris - auris
    #assert not all_missing, f'all is not all! {all_missing}'  # XXX have to deal with ilx_only separately as NLX-ILX or something

    # fixed
    #sao_add = {o.toPython():s.toPython() for s, p, o in ng.g if s.toPython() in sao_missing}
    #assert len(sao_add) == len(sao_missing), 'EEEEEEEEEEEEEEE'
    #with open('/tmp/please-add-these-sao-ids-as-existing-ids-to-the-listed-interlex-record.json', 'wt') as f:
        #json.dump(sao_add, f, indent=2)

    to_review = sorted(ilx_missing)

    # not relevant anymore
    #with open('thought-to-be-missing.json', 'rt') as f:
        #thought_to_be_missing = json.load(f)

    # from troy has issues
    #with open('nifext-duplicates-and-new.json', 'rt') as f:
        #nifext_data = json.load(f)

    #nifext_dupes = {v['current_nifext_id']:v['dropped_nifext_ids'][-1] if v['dropped_nifext_ids'] else None for v in nifext_data.values()}

    sgv = Vocabulary(cache=True)
    trts = [(v, (sgv.findById(v)['labels'][0]
                 if sgv.findById(v)['labels']
                 else '<--NO-LABEL-->')
             if sgv.findById(v)
             else '<------>')
            for v in to_review]

    sgg = sGraph(cache=True)
    SGG = Namespace(sgg._basePath.rstrip('/') + '/graph/')
    rg = Graph().parse((gitf / 'NIF-Ontology/ttl/unused/NIF-Retired.ttl').as_posix(), format='turtle')
    retired = set(e.toPython() for t in rg for e in t if 'uri.neuinfo.org' in e)
    retfile = '<ttl/unused/NIF-Retired.ttl>'
    help_graph = createOntology(filename='NIFSTD-BLACKHOLE-mapping',
                        name='HELPPPPPPPP!!!!',
                        prefixes=uPREFIXES,)
    def make_rt(to_review_tuples, retired=retired):
        def inner(u, l, retired=retired):
            ne = sgg.getNeighbors(u, relationshipType="isDefinedBy", depth=1)
            if ne:
                curie = help_graph.qname(u)
                help_graph.g.add((URIRef(u), ilxtr.SciGraphLookup, URIRef(f'http://scigraph.olympiangods.org/scigraph/graph/{curie}')))
            if ne and ne['edges']:
                src = ' '.join([f'<{e["obj"]}>' for e in ne["edges"]])
            elif u in retired:
                src = retfile
            else:
                src = '<>'
            return f'{u:<70} {l:<50} {src}'
        out = Async(rate=3000)(deferred(inner)(u, l) for u, l in sorted(to_review_tuples, key=lambda a:a[-1]))
        return '\n'.join(out)

    review_text = make_rt(trts)
    trts2 = [(u, l) for u, l in trts if 'nifext' not in u]
    not_nifext = make_rt(trts2)

    hng = cull_prefixes(help_graph.g, prefixes=uPREFIXES)
    hng.filename = help_graph.filename
    hng.write()

    ###
    #   Accounting of uri.neuinfo.org ids that do not resolve
    ###

    not_in_interlex = set(s for s, o in hng.g.subject_objects(ilxtr.SciGraphLookup))
    bh_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and sgv.findById(s)['deprecated'])
    bh_not_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and not sgv.findById(s)['deprecated'])
    bh_nifexts = set(s for s in bh_not_deprecated if 'nifext' in s)
    bh_readable = set(s for s in bh_not_deprecated if 'readable' in s)
    unaccounted = not_in_interlex - bh_readable - bh_nifexts - bh_deprecated
    namedinds = set(s for s in unaccounted
                    if sgv.findById(s) and
                    sgg.getNode(s)['nodes'][0]['meta']['types'] and
                    sgg.getNode(s)['nodes'][0]['meta']['types'][0] == 'NamedIndividual')
    unaccounted = unaccounted - namedinds
    ual = sorted(o for s in unaccounted for o in hng.g.objects(s, ilxtr.SciGraphLookup))
    report = (
        f'Total       {len(not_in_interlex)}\n'
        f'deprecated  {len(bh_deprecated)}\n'
        f'nd nifext   {len(bh_nifexts)}\n'
        f'nd readable {len(bh_readable)}\n'
        f'nd namedind {len(namedinds)}\n'
        f'unaccounted {len(unaccounted)}\n'
             )
    print(report)

    def reverse_report():
        ilx = Graph()
        ilx.parse('/tmp/interlex.ttl', format='turtle')
        not_in_ontology = set()
        annotations = set()
        relations = set()
        drugbank = set()
        t3db = set()
        for subject in ilx.subjects(rdf.type, owl.Class):
            ok = False
            for object in ilx.objects(subject, oboInOwl.hasDbXref):
                if anyMembers(object, 'uri.neuinfo.org', 'GO_', 'CHEBI_', 'PR_',
                              'PATO_', 'HP_', 'OBI_', 'DOID_', 'COGPO_', 'CAO_',
                              'UBERON_', 'NCBITaxon_', 'SO_', 'IAO_'):
                    # FIXME doe we areally import HP?
                    ok = True

                if (subject, rdf.type, owl.AnnotationProperty) in ilx:  # FIXME for troy these need to be cleared up
                    annotations.add(subject)
                elif (subject, rdf.type, owl.ObjectProperty) in ilx:
                    relations.add(subject)
                elif 'drugbank' in object:
                    drugbank.add(subject)
                elif 't3db.org' in object:
                    t3db.add(subject)

            if not ok:
                not_in_ontology.add(subject)


        drugbank = drugbank & not_in_ontology
        t3db = t3db & not_in_ontology
        annotations = annotations & not_in_ontology
        relations = relations & not_in_ontology
        unaccounted = not_in_ontology - drugbank - t3db - annotations - relations
        report = (
            f'Total       {len(not_in_ontology)}\n'
            f'annotations {len(annotations)}\n'
            f'relations   {len(relations)}\n'
            f'drugbank    {len(drugbank)}\n'
            f't3db        {len(t3db)}\n'
            f'unaccounted {len(unaccounted)}\n'
        )
        print(report)
        return (not_in_ontology, drugbank, unaccounted)

    _, _, un = reverse_report()

    h_uris = set(e for t in hng.g for e in t if 'uri.neuinfo.org' in e)
    real_problems = problem_uris - h_uris

    ###
    #   Missing neurons
    ###

    with open((gitf / 'nlxeol/neuron_data_curated.csv').as_posix()) as f:
        r = csv.reader(f)
        nheader = next(r)
        rows = list(r)

    ndata = list(zip(*rows))

    def datan(head):
        return ndata[nheader.index(head)]

    if __name__ == '__main__':
        breakpoint()
コード例 #17
0
ファイル: hbp_cells.py プロジェクト: tgbugs/pyontutils
def clean_hbp_cell():
    #old graph
    g = rdflib.Graph()
    if __name__ == '__main__':
        breakpoint()
    path = (auth.get_path('git-local-base') /
            'methodsOntology/ttl/hbp_cell_ontology.ttl')
    if not path.exists():
        raise FileNotFoundError(f'repo for {path} does not exist')

    g.parse(path.as_posix(), format='turtle')
    g.remove((None, rdflib.OWL.imports, None))
    g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology))

    #new graph
    NAME = 'NIF-Neuron-HBP-cell-import'
    mg = makeGraph(NAME, prefixes=PREFIXES)
    ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl'
    mg.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    mg.add_trip(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import')
    mg.add_trip(ontid, rdflib.RDFS.comment, 'this file was automatically using pyontutils/hbp_cells.py')
    mg.add_trip(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today()))
    newgraph = mg.g

    skip = {
        '0000000':'SAO:1813327414',  # cell
        #'0000001':NEURON,  # neuron  (equiv)
        #'0000002':'SAO:313023570',  # glia  (equiv)
        #'0000021':'NLXNEURNT:090804',  # glut  (equiv, but phen)
        #'0000022':'NLXNEURNT:090803',  # gaba  (equiv, but phen)

        '0000003':NEURON,
        '0000004':NEURON,
        '0000005':NEURON,
        '0000006':NEURON,
        '0000007':NEURON,
        '0000008':NEURON,
        '0000009':NEURON,
        '0000010':NEURON,
        '0000019':NEURON,
        '0000020':NEURON,
        '0000033':NEURON,
        '0000034':NEURON,
        '0000070':NEURON,
        '0000071':NEURON,
    }
    to_phenotype = {
        '0000021':('ilx:hasExpressionPhenotype', 'SAO:1744435799'),  # glut, all classes that might be here are equived out
        '0000022':('ilx:hasExperssionPhenotype', 'SAO:229636300'),  # gaba
    }
    lookup = {'NIFCELL', 'NIFNEURNT'}
    missing_supers = {
        'HBP_CELL:0000136',
        'HBP_CELL:0000137',
        'HBP_CELL:0000140',
    }

    replace = set()
    phen = set()
    equiv = {}
    for triple in sorted(g.triples((None, None, None))):
        id_suffix = newgraph.namespace_manager.compute_qname(triple[0].toPython())[2]
        try:
            obj_suffix = newgraph.namespace_manager.compute_qname(triple[2].toPython())[2]
        except:  # it wasn't a url
            pass
        # equiv insert for help
        if triple[1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype:
            qnt = newgraph.namespace_manager.compute_qname(triple[2].toPython())
            #print(qnt)
            if qnt[0] in lookup:
                try:
                    lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0]
                    print('REMOTE', qnt[0] + ':' + qnt[2], lab)
                    #mg.add_trip(triple[2], rdflib.RDFS.label, lab)
                    #mg.add_trip(triple[0], PREFIXES['NIFRID'] + 'synonym', lab)  # so we can see it
                except TypeError:
                    if qnt[2].startswith('nlx'):
                        triple = (triple[0], triple[1], expand('NIFSTD:' + qnt[2]))
                    #print('bad identifier')

        #check for equiv
        if triple[0] not in equiv:
            eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)]
            if eq and id_suffix not in skip and id_suffix not in to_phenotype:
                if len(eq) > 1:
                    print(eq)
                equiv[triple[0]] = eq[0]
                continue
        elif triple[0] in equiv:
            continue

        # edge replace
        if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym':
            edge =  mg.expand('NIFRID:abbrev')
        elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition':
            edge = rdflib.namespace.SKOS.definition
        else:
            edge = triple[1]

        # skip or to phenotype or equiv
        if id_suffix in skip:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            replace.add(triple[0])
            #print('MEEP MEEP')
        elif id_suffix in to_phenotype:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            phen.add(triple[0])
        elif triple[1] == rdflib.RDFS.label:  # fix labels
            if not triple[2].startswith('Hippocampus'):
                new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en')
                newgraph.add((triple[0], edge, new_label))
            else:
                newgraph.add((triple[0], edge, triple[2]))
        elif triple[2] in replace:
            mg.add_trip(triple[0], edge, skip[obj_suffix])
        elif triple[2] in phen:
            edge_, rst_on = to_phenotype[obj_suffix]
            edge_ = expand(edge_)
            rst_on = expand(rst_on)

            this = triple[0]
            this = infixowl.Class(this, graph=newgraph)
            this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf]

            restriction = infixowl.Restriction(edge_, graph=newgraph, someValuesFrom=rst_on)
            this.subClassOf = [restriction] + [c for c in this.subClassOf]
        elif triple[2] in equiv:
            newgraph.add((triple[0], edge, equiv[triple[2]]))
        else:
            newgraph.add((triple[0], edge, triple[2]))

    # final cleanup for forward references (since we iterate through sorted)

    tt = rdflib.URIRef(expand('HBP_CELL:0000033'))
    tf = rdflib.URIRef(expand('HBP_CELL:0000034'))
    newgraph.remove((None, None, tt))
    newgraph.remove((None, None, tf))

    # add missing subClasses
    for nosub in missing_supers:
        mg.add_trip(nosub, rdflib.RDFS.subClassOf, NEURON)

    # cleanup for subClassOf
    for subject in sorted(newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))):
        sco = [a for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))]
        #print('U WOT M8')
        if len(sco) > 1:
            #print('#############\n', sco)
            for s, p, o in sco:
                if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(NEURON): #or 'sao2128417084' in o:  # neocortex pyramidal cell
                    #print(sco)
                    newgraph.remove((subject, rdflib.RDFS.subClassOf, expand(NEURON)))
                    break

    # do ilx
    ilx_start = ilx_get_start()
    #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv)  # FIXME NOPE, also need to modify the graph :/
    ilx_labels, ilx_replace = ilx_conv(graph=newgraph, prefix='HBP_CELL', ilx_start=ilx_start)
    ilx_add_ids(ilx_labels)

    replace_map = ilx_replace
    for hbp, rep in skip.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep
    for hbp, (e, rep) in to_phenotype.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = edge, rep
    for hbp_iri, rep_iri in equiv.items():
        hbp = newgraph.compute_qname(hbp_iri)[2]
        rep = newgraph.qname(rep_iri)
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep

    return mg, replace_map
コード例 #18
0
ファイル: hierarchies.py プロジェクト: gsanou/pyontutils
def main():
    sgg = Graph(cache=True)
    sgg_local = Graph(cache=True)

    fma3_r = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#regional_part_of', 'INCOMING', 9)
    fma3_c = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#constitutional_part_of', 'INCOMING', 9)
    #fma3_tree, fma3_extra = creatTree(*fma3_r, graph=sgg_local)

    fma_r = Query('FMA:50801', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    fma_c = Query('FMA:50801', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'INCOMING', 20)
    fma_rch_r = Query('FMA:61819', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    #fma_tree, fma_extra = creatTree(*fma_r, graph=sgg_local)
    #fma_tree, fma_extra = creatTree(*fma_rch_r, graph=sgg_local)

    fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20)
    fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'BOTH', 20)
    #fma_tree, fma_extra = creatTree(*fma_hip, graph=sgg_local)

    fma_mfg = Query('FMA:273103', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20)
    #fma_tree, fma_extra = creatTree(*fma_mfg, graph=sgg_local)

    fma_tel = Query('FMA:62000', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    if False:
        fma_gsc_tree, fma_gsc_extra = creatTree(*fma_tel, graph=sgg_local)

        childs = list(fma_gsc_extra[2])  # get the curies for the left/right so we can get parents for all
        g = Graph(cache=True)
        parent_nodes = []
        for curie in childs:
            json = g.getNeighbors(curie, relationshipType='subClassOf')
            if json:
                for node in json['nodes']:
                    if node['id'] != curie:
                        parent_nodes.append(node)  # should have dupes


        breakpoint()
        return

    uberon = Query('UBERON:0000955', 'BFO:0000050', 'INCOMING', 40)
    uberon_tree, uberon_extra = creatTree(*uberon, graph=sgg)
    queries = uberon,

    uberon_flat = sorted(set(n for n in flatten(uberon_extra[0])))
    with open(f'{tempfile.tempdir}/uberon_partonomy_terms', 'wt') as f:
        f.writelines('\n'.join(uberon_flat))

    for query in queries:
        tree, extra = creatTree(*query, graph=sgg)
        dematerialize(list(tree.keys())[0], tree)
        print(tree)
        #print(extra[0])
        with open(f'{tempfile.tempdir}/' + query.root, 'wt') as f:
            f.writelines(tree.print_tree())

        level_sizes = [len(levels(tree, i)) for i in range(11)]
        print('level sizes', level_sizes)
        parent_counts = sorted(set(len(v) for v in extra[-4].values()))
        print('unique parent counts', parent_counts)
        print('num terms', len(extra[2]))

    return

    breakpoint()
コード例 #19
0
def main():
    import rdflib
    from pyontutils.core import makeGraph, makePrefixes, log
    from pyontutils.config import auth

    ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl'
    ncrb = auth.get_path(
        'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl'
    if not ub.exists() or not ncrb.exists():
        # just skip this if we can't file the files
        log.warning(f'missing file {ub} or {ncrb}')
        return

    graph = rdflib.Graph()
    graph.parse(ub.as_posix(), format='turtle')
    graph.parse(ncrb.as_posix(), format='ttl')

    ecgraph = rdflib.Graph()
    oec = EquivalentClass()
    test = tuple(oec.parse(graph=graph))

    ft = oc_.full_combinator(test[0][0], test[0][1])
    ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP'))
    *ft.serialize(ftng.g),
    ftng.write()

    _roundtrip = list(test[0][1](test[0][0]))
    roundtrip = oc_(test[0][0], test[0][1])  # FIXME not quite there yet...
    for t in roundtrip:
        ecgraph.add(t)
    ecng = makeGraph('thing2',
                     graph=ecgraph,
                     prefixes=makePrefixes('owl', 'TEMP'))
    ecng.write()
    if __name__ == '__main__':
        breakpoint()
        return
    r = Restriction(
        rdfs.subClassOf)  #, scope=owl.allValuesFrom)#NIFRID.has_proper_part)
    l = tuple(r.parse(graph=graph))
    for t in r.triples:
        graph.remove(t)
    ng = makeGraph('thing', graph=graph)
    ng.write()
    #print(l)
    restriction = Restriction(None)  #rdf.first)
    ll = List(lift_rules={owl.Restriction: restriction})
    trips = tuple(ll.parse(graph=graph))
    #subClassOf = PredicateCombinator(rdfs.subClassOf)  # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator)
    subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator)
    superDuperClass = subClassOf(
        TEMP.superDuperClass)  # has to exist prior to triples
    ec = oec(
        TEMP.ec1,
        TEMP.ec2,
        restriction(TEMP.predicate0, TEMP.target1),
        restriction(TEMP.predicate1, TEMP.target2),
    )
    egraph = rdflib.Graph()
    acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class),
                             (TEMP.hoh, 'FUN'))
    ft = flattenTriples((
        acombinator((TEMP.annotation, 'annotation value')),
        acombinator((TEMP.anotherAnnotation, 'annotation value again')),
        oc_(TEMP.c1, superDuperClass),
        oc_(TEMP.c2, superDuperClass),
        oc_(TEMP.c3, superDuperClass),
        oc_(TEMP.c4, superDuperClass),
        oc_(TEMP.c5, superDuperClass),
        oc_(TEMP.wat, subClassOf(TEMP.watParent)),
        oc_(TEMP.testSubject),
        ec(TEMP.testSubject),
        oc_(TEMP.more,
            oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))),
    ), )
    [egraph.add(t) for t in ft]
    eng = makeGraph('thing1',
                    graph=egraph,
                    prefixes=makePrefixes('owl', 'TEMP'))
    eng.write()
    if __name__ == '__main__':
        breakpoint()
コード例 #20
0
ファイル: dumpnlx.py プロジェクト: tgbugs/pyontutils
def main():

    with open('nlx_properties', 'rt') as f:
        properties = [
            l.strip() for l in f.readlines() if not l.startswith('#')
        ]

    print(properties)

    def furl(url):
        url = url.replace('[', '-5B')
        url = url.replace(']', '-5D')
        url = url.replace('?', '-3F')
        url = url.replace('=', '%3D')
        return url

    url_prefix = 'http://neurolex.org/wiki/Special:Ask/[[Category:Entity]]/'
    url_suffix = '/mainlabel=Categories/format=csv/sep=,/offset={}/limit={}'

    results = []
    result_step = 2500
    # see https://www.semantic-mediawiki.org/wiki/Help:Configuration#Query_settings
    for props in chunk_list(
            properties, 10
    ):  # 20 too long :/ may be able to fix via $smwgQMaxSize which defaults to 12
        all_rows = []
        for start in range(
                0, 30001, result_step
        ):  # offset limit is fixed via $smwgQMaxLimit in SMW_Settings.php
            url = url_prefix + '/?'.join(props) + url_suffix.format(
                start, result_step)  # crazy stuff when you leave out the ?
            try:
                data = requests.get(furl(url))
            except:
                print('FAILED on URL =', furl(url))
                #breakpoint()
                # data is already defined it will just duplicated the previous block
            reader = csv.reader(data.text.splitlines())
            rows = [r for r in reader]
            all_rows.extend(rows)

        results.append(all_rows)

    with open(expanduser('~/files/nlx_dump_results.pickle'), 'wb') as f:
        pickle.dump(results, f)

    full_rows = []
    for rows in zip(*results):
        outrow = []
        for row in rows:
            if outrow:
                #assert outrow[0] == row[0], "ROW MISMATCH %s %s" % (outrow, row)
                if outrow[0] != row[0]:
                    print("ROW MISMATCH")
                    print(outrow)
                    print(row)
                    print()
                outrow.extend(row[1:])  # already got the category
            else:
                outrow.extend(row)
        full_rows.append(outrow)

    with open('/tmp/neurolex_full.csv', 'wt', newline='\n') as f:
        writer = csv.writer(f)
        writer.writerows(full_rows)

    breakpoint()
コード例 #21
0
def main():
    # TODO test parsing since the trie shifts a lot of the load there

    REPS = 1  # 10 is a good number

    # files to test
    fetch = (
        'http://ontology.neuinfo.org/NIF/ttl/NIF-Chemical.ttl',
        'http://ontology.neuinfo.org/NIF/ttl/NIF-Molecule.ttl',
        'https://raw.githubusercontent.com/tgbugs/pyontutils/master/test/nasty.ttl'
    )

    # functions to test
    functions = constructed,

    if 'TESTING' in os.environ:
        filenames = [
            f.strip("'").rstrip("'")
            for f in os.environ['FILENAMES'].split("' '")
        ]
        run(REPS, filenames=filenames, functions=functions)
    else:
        import shutil
        import requests
        from docopt import docopt

        args = docopt(__doc__)

        if args['--local']:
            filenames = list(filenames_from_fetch(fetch,
                                                  Path.cwd().parent))  # FIXME
            run(REPS, filenames=filenames, functions=functions)
            # check *.results
            breakpoint()
            return

        filenames = list(filenames_from_fetch(fetch, Path.cwd()))
        for name, fe in zip(filenames, fetch):
            if not Path(name).exists():
                print(f'fetching test file {fe}')
                resp = requests.get(fe)
                with open(name, 'wb') as f:
                    f.write(resp.content)

        thisfile = Path(__file__).resolve().absolute()
        thisfolder = thisfile.parent
        files = thisfile, thisfolder / '__init__.py'

        venvs = 'rdflib-4.2.2', 'rdflib-5.0.0'

        data = {}
        pipenv = args['--pipenv']
        for venv in venvs:
            p = Path.cwd() / venv
            po = p / 'pyontutils'
            if pipenv:
                if p.exists():
                    shutil.rmtree(venv)
                po.mkdir(parents=True)

                pkg, version = venv.split('-', 1)

                os.system(
                    f'cd {p.as_posix()} && unset PYTHONPATH && pipenv install {pkg}=={version}'
                )

            for f in files:
                shutil.copy(f.as_posix(), (po / f.name).as_posix())

            if args['--setup']:
                continue

            env = os.environ.copy()
            venv = os.path.expanduser(venv)
            env['PATH'] = venv + '/bin:' + env['PATH']
            env['TESTING'] = ''
            env['PYTHONPATH'] = p.as_posix()
            env['FILENAMES'] = ' '.join(repr(f) for f in filenames)
            sp = subprocess.Popen(
                ['pipenv', 'run', 'pyontutils/rdflib_profile.py'],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                env=env,
                cwd=p.as_posix())
            out, err = sp.communicate()
            print(out.decode())
            asdf = literal_eval(out.decode())
            data[os.path.basename(
                venv)] = asdf  # nclass, ncalls, tottime, cumtime

        if args['--setup']:
            return

        n_files_tested = len(fetch + functions)
        perf_result_index = 3
        avg_cumtime = [{
            k: sum([_[3] for _ in v[i][perf_result_index]]) / REPS
            for k, v in data.items()
        } for i in range(n_files_tested)]

        print(avg_cumtime)

        asdf = []  # alternate computation
        for i, name in enumerate(fetch + tuple(f.__name__ for f in functions)):
            z = {'name': name}
            for k, v in data.items():
                nv = 0
                for q in v[i][perf_result_index]:
                    nv += q[3]
                nv = nv / REPS
                z[k] = nv
            asdf.append(z)

        print(asdf)
        breakpoint()
コード例 #22
0
        def loop_internal(j, header, cell):
            nonlocal id
            nonlocal current_neuron
            nonlocal do_release
            notes = list(process_note(get_note(i + 1, j, self.cells_index)))  # + 1 since headers is removed
            if notes and not header.startswith('has'):
                _predicate = self.convert_other(header)
                if cell:
                    _object = rdflib.Literal(cell)  # FIXME curies etc.
                else:
                    _object = rdf.nil
                other_notes[_predicate, _object] = notes

            if header == 'curie':
                id = OntId(cell).u if cell else None
                return
            elif header == 'label':
                if id == OntId('NIFEXT:66').u:
                    breakpoint()
                label_neuron = cell
                if cell in self.existing:
                    current_neuron = self.existing[cell]
                elif cell:
                    # TODO
                    self.new.append(cell)
                else:
                    raise ValueError(cell)  # wat
                return
            elif header == 'Status':
                # TODO
                if cell == 'Yes':
                    do_release = True
                elif cell == 'Maybe':
                    pass
                elif cell == 'Not yet':
                    pass
                elif cell == 'Delete':
                    pass
                else:
                    pass

                return
            elif header == 'PMID':
                # TODO
                return
            elif header == 'Other reference':
                # TODO
                return
            elif header == 'Other label':
                # TODO
                return
            elif header == 'definition':
                return  # FIXME single space differences between the spreadsheet and the source

                if cell:
                    definition_neuron = rdflib.Literal(cell)

            elif header == 'synonyms':
                if cell:
                    synonyms_neuron = [rdflib.Literal(s.strip())
                                    # FIXME bare comma is extremely dangerous
                                    for s in cell.split(',')]

                return
            elif header in self.skip:
                return

            objects = []
            if cell:
                predicate = self.convert_header(header)
                if predicate is None:
                    log.debug(f'{(header, cell, notes)}')

                for object, label in self.convert_cell(cell):
                    if predicate in NeuronCUT._molecular_predicates:
                        if isinstance(object, tuple):
                            op, *rest = object
                            rest = [OntTerm(o).asIndicator().URIRef for o in rest]
                            object = op, *rest
                        elif object:
                            log.debug(f'{object!r}')
                            object = OntTerm(object).asIndicator().URIRef

                    if isinstance(label, tuple):  # LogicalPhenotype case
                        _err = []
                        for l in label:
                            if self.lower_check(l, cell):
                                _err.append((cell, label))
                        if _err:
                            self.errors.extend(_err)
                        else:
                            objects.append(object)
                    elif self.lower_check(label, cell):
                        self.errors.append((cell, label))
                    elif str(id) == object:
                        self.errors.append((header, cell, object, label))
                        object = None
                    else:
                        objects.append(object)

                if notes:
                    # FIXME this is a hack to only attach to the last value
                    # since we can't distinguish at the moment
                    wat[predicate, object] = notes
                    if object is not None:
                        # object aka iri can be none if we don't find anything
                        object_notes[object] = notes
                    else:
                        predicate_notes[predicate] = notes
                        # FIXME it might also be simpler in some cases
                        # to have this be object_notes[object] = notes
                        # because we are much less likely to have the same
                        # phenotype appear attached to the different dimensions

                        # FIXME comma sep is weak here because the
                        # reference is technically ambiguous
                        # might be an argument for the denormalized form ...
                        # or perhaps having another sheet for cases like that

            else:
                return

            if predicate and objects:
                for object in objects:  # FIXME has layer location phenotype
                    if isinstance(object, tuple):
                        op, *rest = object
                        pes = (Phenotype(r, predicate) for r in rest)  # FIXME nonhomogenous phenotypes
                        phenotypes.append(LogicalPhenotype(op, *pes))
                    elif object:
                        phenotypes.append(Phenotype(object, predicate))
                    else:
                        self.errors.append((object, predicate, cell))
            elif objects:
                self.errors.append((header, objects))
            else:
                self.errors.append((header, cell))
コード例 #23
0
def main():
    branch=auth.get('neurons-branch')
    remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/')

    ont_config = ontneurons(remote)
    ont_neurons = ont_config.neurons()

    bn_config = Config('basic-neurons',
                       # FIXME this should probably be pulled in automatically
                       # from the import statements, and it doesn't work even as is
                       # also a chicken and an egg problem here
                       imports=[remote.iri + 'ttl/generated/swanson.ttl'])

    #RDFL = oq.plugin.get('rdflib')  # FIXME ick
    #rdfl = RDFL(bn_config.core_graph, OntId)
    #OntTerm.query.ladd(rdfl)  # FIXME ick
    bn_config.load_existing()
    bn_neurons = bn_config.neurons()
    #OntTerm.query._services = OntTerm.query._services[:-1]  # FIXME ick

    ndl_config = Config('neuron_data_lifted')
    ndl_config.load_existing()  # FIXME this is extremely slow
    ndl_neurons = sorted(ndl_config.neurons())

    resources = auth.get_path('resources')
    cutcsv = resources / 'cut-development.csv'
    with open(cutcsv.as_posix(), 'rt') as f:
        rows = [l for l in csv.reader(f)]

    bc = byCol(rows)

    (_, *labels), *_ = zip(*bc)
    labels_set0 = set(labels)
    ns = []
    skipped = []
    bamscok = (NIFSTD.BAMSC1125,)
    for n in (ont_neurons + ndl_neurons):
        if n.id_ and 'BAMSC' in n.id_:
            if n.id_ not in bamscok:
                skipped.append(n)
                continue

        l = str(n.origLabel)
        if l is not None:
            for replace, match in rename_rules.items():  # HEH
                l = l.replace(match, replace)

        if l in labels:
            n._origLabel = l
            ns.append(n)

    ns = sorted(ns)
    sns = set(n.origLabel for n in ns)

    labels_set1 = labels_set0 - sns

    agen = [c.label for c in bc if c.autogenerated]
    sagen = set(agen)
    added = [c.label for c in bc if c.added]
    sadded = set(added)
    ans = []
    sans = set()
    missed = set()
    _bl = []  # XXX NOTE THE CONTINUE BELOW
    for n in bn_neurons:
        continue  # we actually get all of these with uberon, will map between them later
        # can't use capitalize here because there are proper names that stay uppercase
        l = n.label.replace('(swannt) ',
                            '').replace('Intrinsic',
                                        'intrinsic').replace('Projection',
                                                             'projection')

        for replace, match in rename_rules.items():  # HEH
            l = l.replace(match, replace)

        if l in agen:
            n._origLabel = l
            ans.append(n)
            sans.add(l)

        else:
            missed.add(l)

        _bl.append(l)

    agen_missing = sagen - sans
    labels_set2 = labels_set1 - sans

    nlx_labels = [c.label for c in bc if c.neurolex]
    snlx_labels = set(nlx_labels)

    class SourceCUT(resSource):
        sourceFile = 'nifstd/resources/cut-development.csv'  # FIXME relative to git workingdir...
        source_original = True

    sources = SourceCUT(),
    swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/'))
    SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/')
    SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/')
    config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns]
    ians = [None] * len(ans)

    with NeuronCUT(CUT.Mammalia):
        mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n)
                 for i, n in zip(ins + ians, ns + ans)]

    smatch, rem = get_smatch(labels_set2)

    labels_set3 = labels_set2 - smatch
    added_unmapped = sadded & labels_set3

    # TODO preserve the names from neuronlex on import ...
    Neuron.write()
    Neuron.write_python()
    raw_neurons = config.neurons()
    # do this before creating the new config
    # even though we are in theory tripling number of neurons in the current config graph
    # it won't show up in the next config (and this is why we need to reengineer)
    raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons]
    config = Config('cut-development', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    # FIXME the call to asUndprecated currenlty triggers addition
    # to the current config and output graph as a side effect (ick!)
    ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons]
    assert len(ids_updated_neurons) == len(raw_neurons)
    Neuron.write()
    Neuron.write_python()
    progress = (len(labels_set0), len(sns), len(sans), len(smatch),
                len(labels_set1), len(labels_set2), len(labels_set3))
    prog_report = ('\nProgress:\n'
                   f'total:            {progress[0]}\n'
                   f'from nlx:         {progress[1]}\n'
                   f'from basic:       {progress[2]}\n'
                   f'from match:       {progress[3]}\n'
                   f'TODO after nlx:   {progress[4]}\n'
                   f'TODO after basic: {progress[5]}\n'
                   f'TODO after match: {progress[6]}\n')
    print(prog_report)
    assert progress[0] == progress[1] + progress[4], 'neurolex does not add up'
    assert progress[4] == progress[2] + progress[5], 'basic does not add up'

    lnlx = set(n.lower() for n in snlx_labels)
    sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons)  # FIXME load origLabel
    nlx_review = lnlx - sos
    nlx_missing = sorted(nlx_review)
    print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):')
    _ = [print(l) for l in nlx_missing]

    partial = {k:v for k, v in rem.items() if v and v not in terminals}
    print(f'\nPartially mapped (n = {len(partial)}):')
    if partial:
        mk = max((len(k) for k in partial.keys())) + 2
        for k, v in sorted(partial.items()):
            print(f'{k:<{mk}} {v!r}')
            #print(f'{k!r:<{mk}}{v!r}')
        #pprint(partial, width=200)
    unmapped = sorted(labels_set3)
    print(f'\nUnmapped (n = {len(labels_set3)}):')
    _ = [print(l) for l in unmapped]

    no_location = [n for n in Neuron.neurons()
                   if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)]
    if __name__ == '__main__':
        review_rows = export_for_review(config, unmapped, partial, nlx_missing)
        breakpoint()

    return config, unmapped, partial, nlx_missing
コード例 #24
0
def main():
    #from neurondm.models.cuts import main as cuts_main
    #cuts_config, *_ = cuts_main()

    from neurondm.compiled.common_usage_types import config as cuts_config
    cuts_neurons = cuts_config.neurons()
    expect_pes = {n.id_:n.pes for n in cuts_neurons}

    sheet = CutsV1()
    _neurons = list(sheet.neurons(expect_pes))
    config = sheet.config
    errors = sheet.errors
    new = sheet.new
    release = sheet.release

    #sheet.show_notes()
    config.write_python()
    config.write()
    #config = Config(config.name)
    #config.load_existing()  # FIXME this is a hack to get get a load_graph

    # FIXME we need this because _bagExisting doesn't deal with unionOf right now
    def trything(f):
        @wraps(f)
        def inner(*args, **kwargs):
            try:
                return f(*args, **kwargs)
            except:
                pass

        return inner

    from neurondm import Config, NeuronCUT

    failed_config = Config('cut-failed')
    [trything(NeuronCUT)(*pes, id_=id_) for id_, pes in sheet.failed.items()]
    failed_config.write_python()
    failed_config.write()

    release_config = Config('cut-release')
    [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release]
    release_config.write_python()
    release_config.write()

    from neurondm.models.cuts import export_for_review
    review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True)
    from pyontutils.utils import byCol
    valuesC = byCol(sheet.values[1:],
                    header=[v.replace(' ', '_') for v in sheet.values[0]],
                    to_index=['label'])
    reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label'])
    def grow(r):
        log.debug(r)
        # TODO implement on the object to allow joining on an index?
        # man this would be easier with sql >_< probably pandas too
        # but so many dependencies ... also diffing issues etc
        if r.label is not None:
            return valuesC.searchIndex('label', r.label)

    def key(field_value):
        field, value = field_value
        try:
            return 0, valuesC.header._fields.index(field)  # TODO warn on field mismatch
        except ValueError as e:
            log.error(f'{field} {value}')
            return 1, 0

    def replace(r, *cols):
        """ replace and reorder """
        # FIXME _super_ inefficient
        vrow = grow(r)
        log.debug('\n'.join(r._fields))
        log.debug('\n'.join(str(_) for _ in r))
        for field, value in sorted(zip(r._fields, r), key=key):
            if field in cols:
                value = getattr(vrow, field)

            yield '' if value is None else value  # completely overwrite the sheet

    breakpoint()
    rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC]
    #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows)
    if __name__ == '__main__':
        breakpoint()