コード例 #1
0
    def create_from_assocs(self, assocs, **args):
        """
        Creates from a list of association objects
        """
        assocs = [a.to_hash_assoc() for a in assocs]
        print(json.dumps(assocs[0], indent=4))

        amap = defaultdict(list)
        subject_label_map = {}
        for a in assocs:
            subj = a['subject']
            subj_id = subj['id']
            subj_label = subj['label']
            subject_label_map[subj_id] = subj_label
            if not a['negated']:
                amap[subj_id].append(a['object']['id'])

        aset = AssociationSet(subject_label_map=subject_label_map,
                              association_map=amap,
                              **args)
        aset.associations_by_subj = defaultdict(list)
        aset.associations_by_subj_obj = defaultdict(list)
        for a in assocs:
            sub_id = a['subject']['id']
            obj_id = a['object']['id']
            aset.associations_by_subj[sub_id].append(a)
            aset.associations_by_subj_obj[(sub_id, obj_id)].append(a)

        return aset
コード例 #2
0
    def create(self,
               ontology=None,
               subject_category=None,
               object_category=None,
               evidence=None,
               taxon=None,
               relation=None,
               file=None,
               fmt=None,
               skim=True):
        """
        creates an AssociationSet

        Currently, this uses an eager binding to a `ontobio.golr` instance. All compact associations for the particular combination
        of parameters are fetched.

        Arguments
        ---------

        ontology:         an `Ontology` object
        subject_category: string representing category of subjects (e.g. gene, disease, variant)
        object_category: string representing category of objects (e.g. function, phenotype, disease)
        taxon:           string holding NCBITaxon:nnnn ID

        """
        meta = AssociationSetMetadata(subject_category=subject_category,
                                      object_category=object_category,
                                      taxon=taxon)

        if file is not None:
            return self.create_from_file(file=file,
                                         fmt=fmt,
                                         ontology=ontology,
                                         meta=meta,
                                         skim=skim)

        logger.info("Fetching assocs from store")
        assocs = bulk_fetch_cached(subject_category=subject_category,
                                   object_category=object_category,
                                   evidence=evidence,
                                   taxon=taxon)

        logger.info("Creating map for {} subjects".format(len(assocs)))

        amap = {}
        subject_label_map = {}
        for a in assocs:
            rel = a['relation']
            subj = a['subject']
            subject_label_map[subj] = a['subject_label']
            amap[subj] = a['objects']

        aset = AssociationSet(ontology=ontology,
                              meta=meta,
                              subject_label_map=subject_label_map,
                              association_map=amap)
        return aset
コード例 #3
0
 def split_assocs(self, root_target : TargetClass, ontology : Ontology = None):
     logging.info('Splitting assocs on: {} // {}'.format(root_target, ontology))
     aset = self.assocs
     if ontology is None:
         ontology = aset.ontology
     fmap = {}
     tmap = {}
     for subj in aset.subjects:
         targets = set()
         features = set()
         for c in aset.annotations(subj):
             if root_target in ontology.ancestors(c, reflexive=True):
                 targets.add(c)
             else:
                 features.add(c)
         fmap[subj] = features
         tmap[subj] = targets
     self.assocs = AssociationSet(ontology=ontology, association_map=fmap)
     self.target_assocs = AssociationSet(ontology=ontology, association_map=tmap)
     logging.info('Split; f={} t={}'.format(self.assocs, self.target_assocs))
コード例 #4
0
ファイル: test_assocmodel.py プロジェクト: valearna/ontobio
def test_enrichment():
    """
    enrichment
    """
    factory = OntologyFactory()
    ont = factory.create('pato')

    # gene set 'a' is biased to ploidy
    termprobs = [(QUALITY, 0.8, 0.8), (PLOIDY, 0.8, 0.2), (EUPLOID, 0.7, 0.01),
                 (SHAPE, 0.2, 0.75), (Y_SHAPED, 0.01, 0.5)]
    amap = {}
    geneset_a = []
    geneset_b = []
    for x in range(1, 100):
        for y in ['a', 'b']:
            dts = []
            for (t, p1, p2) in termprobs:
                if y == 'a':
                    p = p1
                else:
                    p = p2
                if random.random() < p:
                    dts.append(t)
            g = y + str(x)
            if y == 'a':
                geneset_a.append(g)
            else:
                geneset_b.append(g)
            amap[g] = dts
    logging.info(str(amap))
    aset = AssociationSet(ontology=ont, association_map=amap)
    logging.info(str(aset))
    print(str(geneset_a))
    results = aset.enrichment_test(geneset_a, labels=True)
    print(str(results))
    print("EXPECTED: {} {}".format(PLOIDY, EUPLOID))
    results = aset.enrichment_test(geneset_b, labels=True)
    print(str(results))
    print("EXPECTED: {} {}".format(SHAPE, Y_SHAPED))
コード例 #5
0
ファイル: assoc_factory.py プロジェクト: lpalbou/ontobio
    def create_from_tuples(self, tuples, **args):
        """
        Creates from a list of (subj,subj_name,obj) tuples
        """
        amap = {}
        subject_label_map = {}
        for a in tuples:
            subj = a[0]
            subject_label_map[subj] = a[1]
            if subj not in amap:
                amap[subj] = []
            amap[subj].append(a[2])

        aset = AssociationSet(subject_label_map=subject_label_map, association_map=amap, **args)
        return aset
コード例 #6
0
ファイル: semsim.py プロジェクト: valearna/ontobio
def jaccard_similarity(aset: AssociationSet, s1: str, s2: str) -> float:
    """
    Calculate jaccard index of inferred associations of two subjects

    |ancs(s1) /\ ancs(s2)|
    ---
    |ancs(s1) \/ ancs(s2)|

    """
    a1 = aset.inferred_types(s1)
    a2 = aset.inferred_types(s2)
    num_union = len(a1.union(a2))
    if num_union == 0:
        return 0.0
    return len(a1.intersection(a2)) / num_union
コード例 #7
0
    def jaccard_similarity(aset: AssociationSet, s1: str, s2: str) -> Tuple[float, list]:
        """
        Calculate jaccard index of inferred associations of two subjects

        |ancs(s1) /\ ancs(s2)|
        ---
        |ancs(s1) \/ ancs(s2)|

        """
        a1 = aset.inferred_types(s1)
        a2 = aset.inferred_types(s2)
        num_union = len(a1.union(a2))
        if num_union == 0:
            return 0.0, list()

        shared_terms = a1.intersection(a2)

        # Note: we need to convert the shared_terms set to a list
        # to avoid later JSON serialization problems
        return len(shared_terms) / num_union, list(shared_terms)
コード例 #8
0
ファイル: test_assocmodel.py プロジェクト: valearna/ontobio
def test_assoc_query():
    """
    reconstitution test
    """
    print("Making ont factory")
    factory = OntologyFactory()
    # default method is sparql
    print("Creating ont")
    ont = factory.create('pato')
    print("Creating assoc set")
    aset = AssociationSet(ontology=ont,
                          association_map={
                              'a': [],
                              'b': [EUPLOID],
                              'c': [Y_SHAPED],
                              'd': [EUPLOID, Y_SHAPED],
                          })

    rs = aset.query([], [])
    assert len(rs) == 4

    rs = aset.query([EUPLOID], [])
    assert len(rs) == 2
    assert 'b' in rs
    assert 'd' in rs

    rs = aset.query([EUPLOID, Y_SHAPED], [])
    assert len(rs) == 1
    assert 'd' in rs

    rs = aset.query([PLOIDY, SHAPE], [])
    assert len(rs) == 1
    assert 'd' in rs

    rs = aset.query([], [PLOIDY, SHAPE])
    assert len(rs) == 1
    assert 'a' in rs

    rs = aset.query([PLOIDY], [SHAPE])
    assert len(rs) == 1
    assert 'b' in rs

    rs = aset.query([EUPLOID], [Y_SHAPED])
    assert len(rs) == 1
    assert 'b' in rs

    rs = aset.query([EUPLOID], [PLOIDY])
    assert len(rs) == 0

    rs = aset.query([PLOIDY], [EUPLOID])
    assert len(rs) == 0

    rs = aset.query([QUALITY], [PLOIDY])
    assert len(rs) == 1
    assert 'c' in rs

    rs = aset.query([SHAPE], [QUALITY])
    assert len(rs) == 0

    rs = aset.query([QUALITY], [QUALITY])
    assert len(rs) == 0

    for s1 in aset.subjects:
        for s2 in aset.subjects:
            sim = aset.jaccard_similarity(s1, s2)
            print("{} vs {} = {}".format(s1, s2, sim))
            if s1 == 'a' or s2 == 'a':
                assert sim == 0.0
            elif s1 == s2:
                assert sim == 1.0
            else:
                assert sim == aset.jaccard_similarity(s2, s1)

    terms1 = [QUALITY, PLOIDY, SHAPE]
    terms2 = [QUALITY, EUPLOID, Y_SHAPED]
    ilist = aset.query_intersections(terms1, terms2)
    print(str(ilist))