def test_get_model(bento_neo4j): (b,h)=bento_neo4j the_mdb = MDB(uri=b) assert the_mdb ObjectMap.clear_cache() m = Model(handle='ICDC',mdb=the_mdb) m.dget() with m.drv.session() as session: result = session.run('match (n:node) where n.model="ICDC" return count(n)') assert len(m.nodes) == result.single().value() result = session.run('match (n:relationship) where n.model="ICDC" return count(n)') assert len(m.edges) == result.single().value() result = session.run('match (p:property)<--(n:node) where p.model="ICDC" and n.model="ICDC" return count(p)') assert len(m.props) == result.single().value() result = session.run( 'match (s:node)<-[:has_src]-(e:relationship)-[:has_dst]->(d:node) where e.model="ICDC" return s,e,d') for rec in result: (s,e,d) = (rec['s'],rec['e'],rec['d']) triplet = (e['handle'], s['handle'], d['handle']) assert m.edges[triplet].handle == e['handle'] assert m.edges[triplet].src.handle == s['handle'] assert m.edges[triplet].dst.handle == d['handle'] result = session.run( 'match (n:node)-[:has_property]->(p:property) where (n.model="ICDC") return n, collect(p) as pp') for rec in result: for p in rec['pp']: key = (rec['n']['handle'], p['handle']) assert m.props[key] assert m.props[key].neoid == p.id assert m.nodes[rec['n']['handle']].props[p['handle']].neoid == p.id result = session.run( 'match (t:term)<-[:has_term]-(v:value_set)<-[:has_value_set]-(p:property) where p.model="ICDC" return p, v, collect(t) as tt') for rec in result: (p, v, tt) = (rec['p'],rec['v'],rec['tt']) [op] = [ x for x in m.props.values() if x.handle == p['handle'] ] vs = op.value_set assert op assert set( op.values ) == { t['value'] for t in tt }
def test_create_model(): model = Model('test') case = Node({"handle": "case"}) # set_trace() case.props['days_to_enrollment'] = Property( {"handle": 'days_to_enrollment'}) model.add_node(case) assert isinstance(model.nodes['case'], Node) assert model.props[('case', 'days_to_enrollment')] model.add_node({"handle": "sample"}) assert model.nodes["sample"] assert isinstance(model.nodes["sample"], Node) assert model.nodes["sample"].model == 'test' case_id = Property({"handle": "case_id", "value_domain": "string"}) model.add_prop(case, case_id) assert model.props[("case", "case_id")] assert model.props[("case", "case_id")].value_domain == 'string' assert 'case_id' in model.nodes['case'].props sample = model.nodes["sample"] of_case = Edge({"handle": "of_case", "src": sample, "dst": case}) of_case.props['operator'] = Property({ "handle": "operator", "value_domain": "boolean" }) model.add_edge(of_case) assert model.edges[('of_case', 'sample', 'case')] assert model.contains(of_case.props['operator']) assert of_case.props['operator'].model == 'test' assert model.props[('of_case', 'sample', 'case', 'operator')] assert model.props[('of_case', 'sample', 'case', 'operator')].value_domain == 'boolean' dx = Property({"handle": "diagnosis", "value_domain": "value_set"}) tm = Term({"value": "CRS"}) model.add_prop(case, dx) model.add_terms(dx, tm, 'rockin_pneumonia', 'fungusamongus') assert {x.value for x in dx.terms.values() } == {'CRS', 'rockin_pneumonia', 'fungusamongus'}
def test_put_model(bento_neo4j): (b,h)=bento_neo4j the_mdb = MDB(uri=b) assert the_mdb ObjectMap.clear_cache() m = Model(handle='ICDC',mdb=the_mdb) m.dget() prop = m.props[('sample','sample_type')] sample = m.nodes['sample'] edge = m.edges[('on_visit','sample', 'visit')] term = Term({"value":"electric_boogaloo"}) m.add_terms(prop, term) node = m.nodes['lab_exam'] m.dput() with m.drv.session() as session: result = session.run('match (v:value_set)-->(t:term {value:"electric_boogaloo"}) return v,t') rec = result.single() assert rec['v'].id == prop.value_set.neoid assert rec['t'].id == term.neoid assert rec['t']['value'] == term.value result = session.run('match (n:node {handle:"lab_exam"}) return n') rec = result.single() assert rec['n'].id == node.neoid term = m.props[('demographic','sex')].terms['M'] assert term.concept assert term.concept._id == "337c0e4f-506a-4f4e-95f6-07c3462b81ff" concept = term.concept assert term in concept.belongs.values() term.concept=None assert not term in concept.belongs.values() assert ('concept',concept) in term.removed_entities m.dput() with m.drv.session() as session: result = session.run('match (t:term) where id(t)=$id return t',{"id":term.neoid}) assert result.single() # term there result = session.run('match (c:concept) where id(c)=$id return c',{"id":concept.neoid}) assert result.single() # concept there result = session.run('match (t:term)-->(c:concept) where id(t)=$id return t',{"id":term.neoid}) assert not result.single() # but link is gone concept._id="heydude" term.concept = concept prop.model = None assert not prop.model m.dput() with m.drv.session() as session: result = session.run('match (t:term)--(c:concept) where id(t)=$id return c',{"id":term.neoid}) s = result.single() assert s assert s['c'].id == concept.neoid assert s['c']['id'] == "heydude" result = session.run('match (p:property) where id(p)=$id return p',{"id":prop.neoid}) s = result.single() assert s assert s['p'].id == prop.neoid assert not 'model' in s['p'] prop.model = 'ICDC' at_enrollment = m.edges[('at_enrollment','prior_surgery','enrollment')] prior_surgery = m.nodes['prior_surgery'] with m.drv.session() as session: result = session.run('match (n:node)<-[:has_src]-(r:relationship {handle:"at_enrollment"})-[:has_dst]->(:node {handle:"enrollment"}) where id(n)=$id return r',{"id":prior_surgery.neoid}) s = result.single() assert s m.rm_edge(at_enrollment) assert not at_enrollment.src assert not at_enrollment.dst assert not at_enrollment in m.edges_out(prior_surgery) m.dput() with m.drv.session() as session: result = session.run('match (n:node)<-[:has_src]-(r:relationship {handle:"at_enrollment"})-[:has_dst]->(:node {handle:"enrollment"}) where id(n)=$id return r',{"id":prior_surgery.neoid}) s = result.single() assert not s result = session.run('match (e:relationship) where id(e)=$id return e',{"id":at_enrollment.neoid}) s = result.single() assert s
def test_init_model(): with pytest.raises(ArgError, match=".*requires arg 'handle'"): Model() m = Model('test') assert m assert m.handle == 'test'
def create_model(self): """Create :class:`Model` instance from loaded YAML Note: This is brittle, since the syntax of MDF is hard-coded into this method.""" if not self.schema.keys(): raise ValueError("attribute 'schema' not set - are yamls loaded?") self._model = Model(handle=self.handle) ynodes = self.schema["Nodes"] yedges = self.schema["Relationships"] ypropdefs = self.schema["PropDefinitions"] yunps = self.schema.get("UniversalNodeProperties") yurps = self.schema.get("UniversalRelationshipProperties") # create nodes for n in ynodes: yn = ynodes[n] init = {"handle": n, "model": self.handle} for a in ["category", "desc"]: if yn.get(a): init[a] = yn[a] node = self._model.add_node(init) if yn.get("Tags"): tags = CollValue({}, owner=node, owner_key="tags") for t in yn["Tags"]: tags[t] = Tag({"value": t}) node["tags"] = tags # create edges (relationships) for e in yedges: ye = yedges[e] for ends in ye["Ends"]: init = { "handle": e, "model": self.handle, "src": self._model.nodes[ends["Src"]], "dst": self._model.nodes[ends["Dst"]], "multiplicity": ends.get("Mul") or ye.get("Mul") or MDF.default_mult, "desc": ends.get("Desc") or ye.get("Desc"), } edge = self._model.add_edge(init) Tags = ye.get("Tags") or ends.get("Tags") if Tags: tags = CollValue({}, owner=edge, owner_key="tags") for t in Tags: tags[t] = Tag({"value": t}) edge["tags"] = tags # create properties for ent in ChainMap(self._model.nodes, self._model.edges).values(): if isinstance(ent, Node): pnames = ynodes[ent.handle]["Props"] if yunps: pnames.extend( yunps["mayHave"] if yunps.get("mayHave") else []) pnames.extend( yunps["mustHave"] if yunps.get("mustHave") else []) elif isinstance(ent, Edge): # props elts appearing Ends hash take # precedence over Props elt in the # handle's hash (hdl, src, dst) = ent.triplet [end] = [ e for e in yedges[hdl]["Ends"] if e["Src"] == src and e["Dst"] == dst ] pnames = end.get("Props") or yedges[hdl].get("Props") if yurps: pnames.extend( yurps["mayHave"] if yurps.get("mayHave") else []) pnames.extend( yurps["mustHave"] if yurps.get("mustHave") else []) else: raise AttributeError( "unhandled entity type {type} for properties".format( type=type(ent).__name__)) if pnames: for pname in pnames: ypdef = ypropdefs.get(pname) if not ypdef: warn( "property '{pname}' does not have a corresponding propdef" .format(pname=pname)) break init = {"handle": pname, "model": self.handle} if ypdef.get("Type"): init.update(self.calc_value_domain(ypdef["Type"])) else: init["value_domain"] = MDF.default_type prop = self._model.add_prop(ent, init) ent.props[prop.handle] = prop if ypdef.get("Tags"): tags = CollValue({}, owner=node, owner_key="tags") for t in ypdef["Tags"]: tags[t] = Tag({"value": t}) prop["tags"] = tags return self._model
class MDF(object): default_mult = "one_to_one" default_type = "TBD" def __init__(self, *yaml_files, handle=None): """Create a :class:`Model` from MDF YAML files. :param str|file|url *yaml_files: MDF filenames or file objects, in desired merge order :param str handle: Handle (name) for the resulting Model :attribute model: the :class:`bento_meta.model.Model` created""" if not handle or not isinstance(handle, str): raise ArgError("arg handle= must be a str - name for model") self.handle = handle self.files = yaml_files self.schema = om.MergedOptions() self._model = None if self.files: self.load_yaml() self.create_model() else: warn("No MDF files provided to constructor") pass @property def model(self): """The :class:`bento_meta.model.Model` object created from the MDF input""" return self._model def load_yaml(self): """Load YAML files or open file handles specified in constructor""" yloader = yaml.loader.Loader for f in self.files: if isinstance(f, str): if re.match("(?:file|https?)://", f): response = requests.get(f) if not response.ok: raise ArgError( "Fetching url {} returned code {}".format( response.url, response.status_code)) response.encoding = "utf8" f = response.text else: f = open(f, "r") try: yml = yaml.load(f, Loader=yloader) self.schema.update(yml) except ConstructorError as ce: print("YAML constructor failed in '{fn}':\n{e}".format( fn=f.name, e=ce)) raise ce except ParserError as pe: print("YAML parser failed in '{fn}':\n{e}".format(fn=f.name, e=pe)) raise pe except Exception: raise def create_model(self): """Create :class:`Model` instance from loaded YAML Note: This is brittle, since the syntax of MDF is hard-coded into this method.""" if not self.schema.keys(): raise ValueError("attribute 'schema' not set - are yamls loaded?") self._model = Model(handle=self.handle) ynodes = self.schema["Nodes"] yedges = self.schema["Relationships"] ypropdefs = self.schema["PropDefinitions"] yunps = self.schema.get("UniversalNodeProperties") yurps = self.schema.get("UniversalRelationshipProperties") # create nodes for n in ynodes: yn = ynodes[n] init = {"handle": n, "model": self.handle} for a in ["category", "desc"]: if yn.get(a): init[a] = yn[a] node = self._model.add_node(init) if yn.get("Tags"): tags = CollValue({}, owner=node, owner_key="tags") for t in yn["Tags"]: tags[t] = Tag({"value": t}) node["tags"] = tags # create edges (relationships) for e in yedges: ye = yedges[e] for ends in ye["Ends"]: init = { "handle": e, "model": self.handle, "src": self._model.nodes[ends["Src"]], "dst": self._model.nodes[ends["Dst"]], "multiplicity": ends.get("Mul") or ye.get("Mul") or MDF.default_mult, "desc": ends.get("Desc") or ye.get("Desc"), } edge = self._model.add_edge(init) Tags = ye.get("Tags") or ends.get("Tags") if Tags: tags = CollValue({}, owner=edge, owner_key="tags") for t in Tags: tags[t] = Tag({"value": t}) edge["tags"] = tags # create properties for ent in ChainMap(self._model.nodes, self._model.edges).values(): if isinstance(ent, Node): pnames = ynodes[ent.handle]["Props"] if yunps: pnames.extend( yunps["mayHave"] if yunps.get("mayHave") else []) pnames.extend( yunps["mustHave"] if yunps.get("mustHave") else []) elif isinstance(ent, Edge): # props elts appearing Ends hash take # precedence over Props elt in the # handle's hash (hdl, src, dst) = ent.triplet [end] = [ e for e in yedges[hdl]["Ends"] if e["Src"] == src and e["Dst"] == dst ] pnames = end.get("Props") or yedges[hdl].get("Props") if yurps: pnames.extend( yurps["mayHave"] if yurps.get("mayHave") else []) pnames.extend( yurps["mustHave"] if yurps.get("mustHave") else []) else: raise AttributeError( "unhandled entity type {type} for properties".format( type=type(ent).__name__)) if pnames: for pname in pnames: ypdef = ypropdefs.get(pname) if not ypdef: warn( "property '{pname}' does not have a corresponding propdef" .format(pname=pname)) break init = {"handle": pname, "model": self.handle} if ypdef.get("Type"): init.update(self.calc_value_domain(ypdef["Type"])) else: init["value_domain"] = MDF.default_type prop = self._model.add_prop(ent, init) ent.props[prop.handle] = prop if ypdef.get("Tags"): tags = CollValue({}, owner=node, owner_key="tags") for t in ypdef["Tags"]: tags[t] = Tag({"value": t}) prop["tags"] = tags return self._model def calc_value_domain(self, typedef): if isinstance(typedef, dict): if typedef.get("pattern"): return { "value_domain": "regexp", "pattern": typedef["pattern"] } elif typedef.get("units"): return { "value_domain": typedef.get("value_type"), "units": ";".join(typedef.get("units")), } else: # punt warn("MDF type descriptor unrecognized: json looks like {j}". format(j=json.dumps(typedef))) return {"value_domain": json.dumps(typedef)} elif isinstance(typedef, list): # a valueset: create value set and term objs vs = ValueSet({"_id": str(uuid4())}) vs.handle = self.handle + vs._id[0:8] if re.match("^(?:https?|bolt)://", typedef[0]): # looks like url vs.url = typedef[0] else: # an enum for t in typedef: vs.terms[t] = Term({"value": t}) return {"value_domain": "value_set", "value_set": vs} else: return {"value_domain": MDF.default_type}