def create_from_assocs(self, assocs, **args): """ Creates from a list of association objects """ assocs = [a.to_hash_assoc() for a in assocs] print(json.dumps(assocs[0], indent=4)) amap = defaultdict(list) subject_label_map = {} for a in assocs: subj = a['subject'] subj_id = subj['id'] subj_label = subj['label'] subject_label_map[subj_id] = subj_label if not a['negated']: amap[subj_id].append(a['object']['id']) aset = AssociationSet(subject_label_map=subject_label_map, association_map=amap, **args) aset.associations_by_subj = defaultdict(list) aset.associations_by_subj_obj = defaultdict(list) for a in assocs: sub_id = a['subject']['id'] obj_id = a['object']['id'] aset.associations_by_subj[sub_id].append(a) aset.associations_by_subj_obj[(sub_id, obj_id)].append(a) return aset
def create(self, ontology=None, subject_category=None, object_category=None, evidence=None, taxon=None, relation=None, file=None, fmt=None, skim=True): """ creates an AssociationSet Currently, this uses an eager binding to a `ontobio.golr` instance. All compact associations for the particular combination of parameters are fetched. Arguments --------- ontology: an `Ontology` object subject_category: string representing category of subjects (e.g. gene, disease, variant) object_category: string representing category of objects (e.g. function, phenotype, disease) taxon: string holding NCBITaxon:nnnn ID """ meta = AssociationSetMetadata(subject_category=subject_category, object_category=object_category, taxon=taxon) if file is not None: return self.create_from_file(file=file, fmt=fmt, ontology=ontology, meta=meta, skim=skim) logger.info("Fetching assocs from store") assocs = bulk_fetch_cached(subject_category=subject_category, object_category=object_category, evidence=evidence, taxon=taxon) logger.info("Creating map for {} subjects".format(len(assocs))) amap = {} subject_label_map = {} for a in assocs: rel = a['relation'] subj = a['subject'] subject_label_map[subj] = a['subject_label'] amap[subj] = a['objects'] aset = AssociationSet(ontology=ontology, meta=meta, subject_label_map=subject_label_map, association_map=amap) return aset
def split_assocs(self, root_target : TargetClass, ontology : Ontology = None): logging.info('Splitting assocs on: {} // {}'.format(root_target, ontology)) aset = self.assocs if ontology is None: ontology = aset.ontology fmap = {} tmap = {} for subj in aset.subjects: targets = set() features = set() for c in aset.annotations(subj): if root_target in ontology.ancestors(c, reflexive=True): targets.add(c) else: features.add(c) fmap[subj] = features tmap[subj] = targets self.assocs = AssociationSet(ontology=ontology, association_map=fmap) self.target_assocs = AssociationSet(ontology=ontology, association_map=tmap) logging.info('Split; f={} t={}'.format(self.assocs, self.target_assocs))
def create_from_tuples(self, tuples, **args): """ Creates from a list of (subj,subj_name,obj) tuples """ amap = {} subject_label_map = {} for a in tuples: subj = a[0] subject_label_map[subj] = a[1] if subj not in amap: amap[subj] = [] amap[subj].append(a[2]) aset = AssociationSet(subject_label_map=subject_label_map, association_map=amap, **args) return aset
def test_enrichment(): """ enrichment """ factory = OntologyFactory() ont = factory.create('pato') # gene set 'a' is biased to ploidy termprobs = [(QUALITY, 0.8, 0.8), (PLOIDY, 0.8, 0.2), (EUPLOID, 0.7, 0.01), (SHAPE, 0.2, 0.75), (Y_SHAPED, 0.01, 0.5)] amap = {} geneset_a = [] geneset_b = [] for x in range(1, 100): for y in ['a', 'b']: dts = [] for (t, p1, p2) in termprobs: if y == 'a': p = p1 else: p = p2 if random.random() < p: dts.append(t) g = y + str(x) if y == 'a': geneset_a.append(g) else: geneset_b.append(g) amap[g] = dts logging.info(str(amap)) aset = AssociationSet(ontology=ont, association_map=amap) logging.info(str(aset)) print(str(geneset_a)) results = aset.enrichment_test(geneset_a, labels=True) print(str(results)) print("EXPECTED: {} {}".format(PLOIDY, EUPLOID)) results = aset.enrichment_test(geneset_b, labels=True) print(str(results)) print("EXPECTED: {} {}".format(SHAPE, Y_SHAPED))
def test_assoc_query(): """ reconstitution test """ print("Making ont factory") factory = OntologyFactory() # default method is sparql print("Creating ont") ont = factory.create('pato') print("Creating assoc set") aset = AssociationSet(ontology=ont, association_map={ 'a': [], 'b': [EUPLOID], 'c': [Y_SHAPED], 'd': [EUPLOID, Y_SHAPED], }) rs = aset.query([], []) assert len(rs) == 4 rs = aset.query([EUPLOID], []) assert len(rs) == 2 assert 'b' in rs assert 'd' in rs rs = aset.query([EUPLOID, Y_SHAPED], []) assert len(rs) == 1 assert 'd' in rs rs = aset.query([PLOIDY, SHAPE], []) assert len(rs) == 1 assert 'd' in rs rs = aset.query([], [PLOIDY, SHAPE]) assert len(rs) == 1 assert 'a' in rs rs = aset.query([PLOIDY], [SHAPE]) assert len(rs) == 1 assert 'b' in rs rs = aset.query([EUPLOID], [Y_SHAPED]) assert len(rs) == 1 assert 'b' in rs rs = aset.query([EUPLOID], [PLOIDY]) assert len(rs) == 0 rs = aset.query([PLOIDY], [EUPLOID]) assert len(rs) == 0 rs = aset.query([QUALITY], [PLOIDY]) assert len(rs) == 1 assert 'c' in rs rs = aset.query([SHAPE], [QUALITY]) assert len(rs) == 0 rs = aset.query([QUALITY], [QUALITY]) assert len(rs) == 0 for s1 in aset.subjects: for s2 in aset.subjects: sim = aset.jaccard_similarity(s1, s2) print("{} vs {} = {}".format(s1, s2, sim)) if s1 == 'a' or s2 == 'a': assert sim == 0.0 elif s1 == s2: assert sim == 1.0 else: assert sim == aset.jaccard_similarity(s2, s1) terms1 = [QUALITY, PLOIDY, SHAPE] terms2 = [QUALITY, EUPLOID, Y_SHAPED] ilist = aset.query_intersections(terms1, terms2) print(str(ilist))