Example #1
0
File: ddr.py Project: bio2bel/ddr
def _make_graph(
    df: pd.DataFrame,
    use_tqdm: bool = False,
    min_network_separation: float = 0,
) -> BELGraph:
    graph = BELGraph(
        name="Disease-disease relationships",
        version="1.0.0",
    )
    '''
    From lit: In summary, we conclude that the proposed separation measure sAB offers a robust quantification
    of the network-based relationship between diseases. As expected, we find that most disease pairs
    are clearly separated (sAB > 0), however, we also identified a considerable number of disease pairs
    with statistically significant overlap (sAB < 0).
    '''
    it = df[['disease_A', 'disease_B', 's_AB (observed)']].iterrows()
    if use_tqdm:
        it = tqdm(it, total=len(df.index), desc='generating BEL')
    for _, (disease_a, disease_b, network_separation) in it:
        if not disease_a or not disease_b or network_separation > min_network_separation:
            continue
        graph.add_association(Pathology("mesh", disease_a),
                              Pathology("mesh", disease_b),
                              citation="25700523",
                              evidence="from ddr",
                              annotations={
                                  'bio2bel': MODULE_NAME,
                                  's_AB': network_separation,
                              })

    return graph
Example #2
0
    def test_isA(self):
        """
        3.4.5 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_isa
        """
        statement = 'pathology(MESH:Psoriasis) isA pathology(MESH:"Skin Diseases")'
        result = self.parser.relation.parseString(statement)

        expected_result = [[PATHOLOGY, 'MESH', 'Psoriasis'], 'isA',
                           [PATHOLOGY, 'MESH', 'Skin Diseases']]
        self.assertEqual(expected_result, result.asList())

        sub = Pathology('MESH', 'Psoriasis')
        self.assert_has_node(sub)

        obj = Pathology('MESH', 'Skin Diseases')
        self.assert_has_node(obj)

        self.assert_has_edge(sub, obj, relation=IS_A)
Example #3
0
    def test_cnc_with_subject_variant(self):
        """Test a causesNoChange relationship with a variant in the subject.

        See also: 3.1.6 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#Xcnc
        """
        statement = 'g(HGNC:APP,sub(G,275341,C)) cnc path(MESH:"Alzheimer Disease")'
        result = self.parser.relation.parseString(statement)

        expected_dict = {
            SOURCE: {
                FUNCTION: GENE,
                CONCEPT: {
                    NAMESPACE: 'HGNC',
                    NAME: 'APP',
                },
                VARIANTS: [
                    {
                        KIND: HGVS,
                        HGVS: 'c.275341G>C'
                    },
                ],
            },
            RELATION: CAUSES_NO_CHANGE,
            TARGET: {
                FUNCTION: PATHOLOGY,
                CONCEPT: {
                    NAMESPACE: 'MESH',
                    NAME: 'Alzheimer Disease',
                },
            },
        }
        self.assertEqual(expected_dict, result.asDict())

        app_gene = gene(namespace='HGNC', name='APP')
        self.assert_has_node(app_gene)
        sub = app_gene.with_variants(hgvs('c.275341G>C'))
        self.assert_has_node(sub)

        obj = Pathology('MESH', 'Alzheimer Disease')
        self.assert_has_node(obj)

        self.assert_has_edge(sub, obj, relation=expected_dict[RELATION])
Example #4
0
    def bel_isolated_reconstituted(self, graph: BELGraph):
        """Run the isolated node test."""
        self.assertIsNotNone(graph)
        self.assertIsInstance(graph, BELGraph)

        adgrb1 = Protein(namespace='HGNC', name='ADGRB1')
        adgrb2 = Protein(namespace='HGNC', name='ADGRB2')
        adgrb_complex = ComplexAbundance([adgrb1, adgrb2])
        achlorhydria = Pathology(namespace='MESHD', name='Achlorhydria')

        for node in graph:
            self.assertIsInstance(node, BaseEntity)

        self.assertIn(adgrb1, graph)
        self.assertIn(adgrb2, graph)
        self.assertIn(adgrb_complex, graph)
        self.assertIn(achlorhydria, graph)

        assert_has_edge(self, adgrb1, adgrb_complex, graph, relation=PART_OF)
        assert_has_edge(self, adgrb2, adgrb_complex, graph, relation=PART_OF)
 def test_pathology(self):
     node = Pathology(namespace='DO', name='Alzheimer disease')
     self.assertEqual('path(DO:"Alzheimer disease")', str(node))
Example #6
0
r1 = Rna(namespace=HGNC, name='1')
p1 = Protein(HGNC, name='1')

g2 = Gene(HGNC, name='2')
r2 = Rna(HGNC, name='2')
p2 = Protein(HGNC, name='2')

g3 = Gene(namespace=HGNC, name='3')
r3 = Rna(namespace=HGNC, name='3')
p3 = Protein(namespace=HGNC, name='3')

g4 = Gene(namespace=HGNC, name='4')
m4 = MicroRna(namespace=HGNC, name='4')

a5 = Abundance(namespace=CHEBI, name='5')
p5 = Pathology(namespace=GO, name='5')


class TestCollapseProteinInteractions(unittest.TestCase):
    def test_protein_interaction_1(self):
        graph = BELGraph()

        graph.add_node_from_data(p1)
        graph.add_node_from_data(p2)
        graph.add_node_from_data(a5)
        graph.add_node_from_data(p5)

        graph.add_qualified_edge(p1, p2, relation=POSITIVE_CORRELATION, citation=n(), evidence=n())

        graph.add_qualified_edge(p1, p2, relation=INCREASES, citation=n(), evidence=n())
        graph.add_qualified_edge(a5, p5, relation=DIRECTLY_INCREASES, citation=n(), evidence=n())
Example #7
0
evidence_1 = "Evidence 1"
dummy_evidence = 'These are mostly made up'

akt1 = hgnc(name='AKT1')
egfr = hgnc(name='EGFR')
fadd = hgnc(name='FADD')
casp8 = hgnc(name='CASP8')
mia = hgnc(name='MIA')

il6 = Protein('HGNC', 'IL6')

adgrb1 = Protein(namespace='HGNC', name='ADGRB1')
adgrb2 = Protein(namespace='HGNC', name='ADGRB2')
adgrb_complex = ComplexAbundance([adgrb1, adgrb2])
achlorhydria = Pathology(namespace='MESHD', name='Achlorhydria')

akt1_rna = akt1.get_rna()
akt1_gene = akt1_rna.get_gene()
akt_methylated = akt1_gene.with_variants(GeneModification('Me'))
akt1_phe_508_del = akt1_gene.with_variants(Hgvs('p.Phe508del'))

cftr = hgnc('CFTR')
cftr_protein_unspecified_variant = cftr.with_variants(HgvsUnspecified())
cftr_protein_phe_508_del = cftr.with_variants(Hgvs('p.Phe508del'))

adenocarcinoma = Pathology('MESHD', 'Adenocarcinoma')
interleukin_23_complex = NamedComplexAbundance('GO', 'interleukin-23 complex')

oxygen_atom = Abundance(namespace='CHEBI', name='oxygen atom')
hydrogen_peroxide = Abundance('CHEBI', 'hydrogen peroxide')
Example #8
0
 Protein('HGNC', 'ZAP70'),
 NamedComplexAbundance('SCOMP', 'T Cell Receptor Complex'),
 BiologicalProcess('GO', 'T cell activation'),
 Protein('HGNC', 'CCL3'),
 Protein('HGNC', 'PLCG1'),
 Protein('HGNC', 'FASLG'),
 Protein('HGNC', 'IDO1'),
 Protein('HGNC', 'IL2'),
 Protein('HGNC', 'CD8A'),
 Protein('HGNC', 'CD8B'),
 Protein('HGNC', 'PLCG1'),
 Protein('HGNC', 'BCL2'),
 Protein('HGNC', 'CCR3'),
 Protein('HGNC', 'IL2RB'),
 Protein('HGNC', 'CD28'),
 Pathology('SDIS', 'Cytotoxic T-cell activation'),
 Protein('HGNC', 'FYN'),
 Protein('HGNC', 'CXCL16'),
 Protein('HGNC', 'CCR5'),
 Protein('HGNC', 'LCK'),
 Protein('SFAM', 'Chemokine Receptor Family'),
 Protein('HGNC', 'CXCL9'),
 Pathology('SDIS', 'T-cell migration'),
 Protein('HGNC', 'CXCR3'),
 Abundance('CHEBI', 'acrolein'),
 Protein('HGNC', 'IDO2'),
 Pathology('MESHD', 'Pulmonary Disease, Chronic Obstructive'),
 Protein('HGNC', 'IFNG'),
 Protein('HGNC', 'TNFRSF4'),
 Protein('HGNC', 'CTLA4'),
 Protein('HGNC', 'GZMA'),
Example #9
0
def _rel(x):
    return {RELATION: x}


def _rela(x, y=None):
    return {RELATION: x, OBJECT: activity(y)}


def _assoc(y):
    return {RELATION: ASSOCIATION, 'association_type': y}


a1 = Abundance('CHEBI', '1')
p1 = Protein('HGNC', '1')
pf1 = Protein('INTERPRO', '1')
d1 = Pathology('MESH', '1')
b1 = BiologicalProcess('GO', '1')
b2 = BiologicalProcess('GO', '2')
m1 = MicroRna('MIRBASE', '1')
r1 = Rna('HGNC', '1')
r2 = Rna('HGNC', '2')
nca1 = NamedComplexAbundance('FPLX', '1')
pop1 = Population('taxonomy', '1')

p2 = Protein('HGNC', identifier='9236')
p3 = Protein('HGNC', identifier='9212')
r3 = p3.get_rna()
g3 = r3.get_gene()

c1 = ComplexAbundance([p2, g3])
c2 = ComplexAbundance([p1, p2])
Example #10
0
from tests.constants import (
    BelReconstitutionMixin,
    akt1,
    casp8,
    egfr,
    expected_test_simple_metadata,
    fadd,
    test_citation_dict,
    test_evidence_text,
)

fos = hgnc(name='FOS')
jun = hgnc(name='JUN')
mirna_1 = mirbase(name=n())
mirna_2 = mirbase(name=n())
pathology_1 = Pathology('DO', n())
ap1_complex = ComplexAbundance([fos, jun])

egfr_dimer = ComplexAbundance([egfr, egfr])

yfg_data = hgnc(name='YFG')
e2f4_data = hgnc(name='E2F4')
bound_ap1_e2f4 = ComplexAbundance([ap1_complex, e2f4_data])

superoxide = chebi(name='superoxide')
hydrogen_peroxide = chebi(name='hydrogen peroxide')
oxygen = chebi(name='oxygen')
superoxide_decomposition = Reaction(reactants=[superoxide],
                                    products=[hydrogen_peroxide, oxygen])

Example #11
0
def get_graph() -> BELGraph:
    df = df_getter()
    graph = BELGraph(
        name='GWAS Catalog',
        version='1.0.2',
    )
    graph.namespace_pattern.update(
        dict(
            dbsnp=r'^rs\d+$',
            efo=r'^\d{7}$',
            hgnc=r'^((HGNC|hgnc):)?\d{1,5}$',
        ))

    it = tqdm(df.values, desc='Mapping GWAS Catalog to BEL')
    for (
            pmid,
            mapped_gene,
            dbsnp_id,
            context,
            intergenic,
            minus_log_p_value,
            risk_allele_frequency,
            or_or_beta,
            confidence_interval,
            mapped_trait,
            mapped_trait_uri,
    ) in it:
        if pd.isna(mapped_trait_uri):
            continue

        annotations = dict(
            minus_log_p_value=minus_log_p_value,
            risk_allele_frequency=risk_allele_frequency,
            odds_ratio_or_beta=or_or_beta,
            confidence_interval=confidence_interval,
        )

        if pd.notna(context):
            annotations['gwascatalog_context'] = {
                c.strip()
                for c in context.split(';')
            }

        dbsnp_node = Gene(
            namespace='dbsnp',
            identifier=dbsnp_id,
        )
        pathology_node = Pathology(
            namespace='efo',
            name=mapped_trait,
            identifier=mapped_trait_uri.split('/')[-1][4:],
        )

        graph.add_association(
            dbsnp_node,
            pathology_node,
            citation=str(pmid),
            evidence=MODULE_NAME,
            annotations=annotations,
        )

        if intergenic in {'0', '0.0', 0, 0.0}:
            gene_symbols = [
                gene_symbol.strip() for gene_symbol in mapped_gene.split(',')
            ]
            for gene_symbol in gene_symbols:
                hgnc_id = hgnc_name_to_id.get(gene_symbol)
                if hgnc_id is None:
                    continue
                    # TODO lookup for ensembl identifiers
                    # gene_node = Gene(
                    #     namespace='ensembl',
                    #     name=gene_symbol,
                    # )
                else:
                    gene_node = Gene(
                        namespace='hgnc',
                        identifier=hgnc_id,
                        name=gene_symbol,
                    )
                graph.add_has_variant(gene_node, dbsnp_node)
                graph.add_association(
                    gene_node,
                    pathology_node,
                    citation=str(pmid),
                    evidence=MODULE_NAME,
                    annotations=annotations,
                )

    return graph
Example #12
0
# -*- coding: utf-8 -*-

from bio2bel_hmdb.enrich import *
from pybel import BELGraph
from pybel.dsl import Abundance, Pathology, Protein
from tests.constants import DatabaseMixin

hmdb_tuple1 = Abundance('HMDB', 'HMDB00008')
protein_tuple = Protein('UP', 'P50440')

# test enriching with tissues
hmdb_tuple2 = Abundance('HMDB', 'HMDB00064')
disease_tuple = Pathology('HMDB_D', 'Lung Cancer')


class TestEnrich(DatabaseMixin):
    def test_enrich_metabolites_proteins(self):
        g = BELGraph()
        g.add_node_from_data(hmdb_tuple1)

        self.assertEqual(1, g.number_of_nodes())
        self.assertEqual(0, g.number_of_edges())

        enrich_metabolites_proteins(g, self.manager)
        self.assertEqual(4, g.number_of_nodes())
        self.assertEqual(3, g.number_of_edges())
        self.assertTrue(g.has_edge(protein_tuple, hmdb_tuple1))

    def test_enrich_metabolites_diseases(self):
        g = BELGraph()
        g.add_node_from_data(hmdb_tuple2)
Example #13
0
evidence_1 = "Evidence 1"
dummy_evidence = 'These are mostly made up'

akt1 = hgnc(name='AKT1')
egfr = hgnc(name='EGFR')
fadd = hgnc(name='FADD')
casp8 = hgnc(name='CASP8')
mia = hgnc(name='MIA')

il6 = Protein('HGNC', 'IL6')

adgrb1 = Protein(namespace='HGNC', name='ADGRB1')
adgrb2 = Protein(namespace='HGNC', name='ADGRB2')
adgrb_complex = ComplexAbundance([adgrb1, adgrb2])
achlorhydria = Pathology(namespace='MESHD', name='Achlorhydria')

akt1_rna = akt1.get_rna()
akt1_gene = akt1_rna.get_gene()
akt_methylated = akt1_gene.with_variants(GeneModification('Me'))
akt1_phe_508_del = akt1_gene.with_variants(Hgvs('p.Phe508del'))

cftr = hgnc(name='CFTR')
cftr_protein_unspecified_variant = cftr.with_variants(HgvsUnspecified())
cftr_protein_phe_508_del = cftr.with_variants(Hgvs('p.Phe508del'))

adenocarcinoma = Pathology('MESHD', 'Adenocarcinoma')
interleukin_23_complex = NamedComplexAbundance('GO', 'interleukin-23 complex')

oxygen_atom = Abundance(namespace='CHEBI', name='oxygen atom')
hydrogen_peroxide = Abundance('CHEBI', 'hydrogen peroxide')