Example #1
0
    def test_get_upstream_causal_subgraph(self):
        """Test get_upstream_causal_subgraph."""
        a, b, c, d, e, f = [
            protein(namespace='test', name=n()) for _ in range(6)
        ]

        universe = BELGraph()
        universe.namespace_pattern['test'] = 'test-url'
        universe.add_increases(a, b, citation=n(), evidence=n())
        universe.add_increases(b, c, citation=n(), evidence=n())
        universe.add_association(d, a, citation=n(), evidence=n())
        universe.add_increases(e, a, citation=n(), evidence=n())
        universe.add_decreases(f, b, citation=n(), evidence=n())

        subgraph = get_upstream_causal_subgraph(universe, [a, b])

        self.assertIsInstance(subgraph, BELGraph)
        self.assert_all_nodes_are_base_entities(subgraph)

        self.assertIn('test', subgraph.namespace_pattern)
        self.assertEqual('test-url', subgraph.namespace_pattern['test'])

        self.assertIn(a, subgraph)
        self.assertIn(b, subgraph)
        self.assertNotIn(c, subgraph)
        self.assertNotIn(d, subgraph)
        self.assertIn(e, subgraph)
        self.assertIn(f, subgraph)
        self.assertEqual(4, subgraph.number_of_nodes())

        self.assert_in_edge(e, a, subgraph)
        self.assert_in_edge(a, b, subgraph)
        self.assert_in_edge(f, b, subgraph)
        self.assertEqual(3, subgraph.number_of_edges())
Example #2
0
File: ddr.py Project: bio2bel/ddr
def _make_graph(
    df: pd.DataFrame,
    use_tqdm: bool = False,
    min_network_separation: float = 0,
) -> BELGraph:
    graph = BELGraph(
        name="Disease-disease relationships",
        version="1.0.0",
    )
    '''
    From lit: In summary, we conclude that the proposed separation measure sAB offers a robust quantification
    of the network-based relationship between diseases. As expected, we find that most disease pairs
    are clearly separated (sAB > 0), however, we also identified a considerable number of disease pairs
    with statistically significant overlap (sAB < 0).
    '''
    it = df[['disease_A', 'disease_B', 's_AB (observed)']].iterrows()
    if use_tqdm:
        it = tqdm(it, total=len(df.index), desc='generating BEL')
    for _, (disease_a, disease_b, network_separation) in it:
        if not disease_a or not disease_b or network_separation > min_network_separation:
            continue
        graph.add_association(Pathology("mesh", disease_a),
                              Pathology("mesh", disease_b),
                              citation="25700523",
                              evidence="from ddr",
                              annotations={
                                  'bio2bel': MODULE_NAME,
                                  's_AB': network_separation,
                              })

    return graph
Example #3
0
def make_graph_4() -> BELGraph:
    """Make an example graph.
        A -> B
        B -| C
        B -| D
        B -| E
        B -| F
        B -> G
        B -> H
        B -| H
        B -> I
        B -- J
        """
    graph = BELGraph(
        name='Lab course example',
        version='1.1.0',
        description='',
        authors='LSI',
        contact='*****@*****.**',
    )

    graph.add_increases(protein_a, protein_b, n(), n())
    graph.add_decreases(protein_b, gene_c, n(), n())
    graph.add_decreases(protein_b, rna_d, n(), n())
    graph.add_decreases(protein_b, protein_e, n(), n())
    graph.add_decreases(protein_b, gene_f, n(), n())
    graph.add_increases(protein_b, protein_g, n(), n())
    graph.add_decreases(protein_b, protein_h, n(), n())
    graph.add_increases(protein_b, protein_h, n(), n())
    graph.add_increases(protein_b, protein_i, n(), n())
    graph.add_association(protein_b, protein_j, n(), n())

    return graph
Example #4
0
    def test_has_polarity(self):
        g = BELGraph()
        a, b, c = (protein(n(), n()) for _ in range(3))
        g.add_increases(a, b, n(), n(), key=0)
        self.assertTrue(has_polarity(g, a.as_tuple(), b.as_tuple(), 0))

        g.add_association(b, c, n(), n(), key=0)
        self.assertFalse(has_polarity(g, b.as_tuple(), c.as_tuple(), 0))
def _add_my_row(
    graph: BELGraph,
    relation: str,
    source_ncbigene_id: str,
    target_ncbigene_id: str,
    pubmed_id: str,
    int_detection_method: str,
    source_database: str,
    confidence: str,
) -> None:  # noqa:C901
    """Add an edge with information about relationship type, source, and target for every PubMed ID.

    :param graph: graph to add edges to
    :param relation: row value of column relation
    :param source_ncbigene_id: row value of column source
    :param target_ncbigene_id: row value of column target
    :param pubmed_id: row value of column pubmed_id
    :param int_detection_method: row value of column interaction detection method
    """
    annotations = {
        'psi-mi': relation,
        'biogrid-detection': int_detection_method,
        'biogrid-source': source_database,
        'biogrid-confidence': confidence,
    }

    if relation in BIOGRID_GENE_ASSOCIATION:
        graph.add_association(
            pybel.dsl.Gene(namespace='ncbigene',
                           identifier=source_ncbigene_id),
            pybel.dsl.Gene(namespace='ncbigene',
                           identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    elif relation in BIOGRID_ASSOCIATION_ACTIONS:
        graph.add_association(
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=source_ncbigene_id),
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    elif relation in BIOGRID_BINDS_ACTIONS:
        graph.add_binds(
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=source_ncbigene_id),
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    else:
        raise ValueError(f'Unhandled BioGrid relation: {relation}')
Example #6
0
def _enrich_graph_with_df(graph: pybel.BELGraph, df: pd.DataFrame) -> None:
    it = df[['ncbigene_id', 'source_name', 'target_id']].values
    for ncbigene_id, ncbi_name, go_id in it:
        graph.add_association(
            pybel.dsl.Protein('ncbigene', identifier=ncbigene_id, name=ncbi_name),
            pybel.dsl.BiologicalProcess('go', identifier=go_id, name=pyobo.get_name('go', go_id)),
            citation='',
            evidence='',
        )
Example #7
0
def make_graph(
    df: Optional[pd.DataFrame] = None,
    use_tqdm: bool = True,
) -> BELGraph:
    """Convert the data to a BEL graph."""
    if df is None:
        df = get_df()

    graph = BELGraph(
        name="PheWAS gene-phenotype relationships",
        version="1.0.0",
    )

    it = df[["snp", 'gene_name', 'phewas phenotype', 'odds-ratio']].iterrows()
    if use_tqdm:
        it = tqdm(it,
                  total=len(df.index),
                  desc='PheWAS Catalog - generating BEL')
    for i, (snp, gene_symbol, phenotype, odds_ratio) in it:
        if not snp or not gene_symbol or not phenotype or pd.isna(phenotype):
            logger.debug('Skipping', i, snp, gene_symbol, phenotype,
                         odds_ratio)
            continue

        graph.add_association(pybel.dsl.Gene("dbsnp", identifier=snp),
                              pybel.dsl.Pathology("mesh", name=phenotype),
                              citation="24270849",
                              evidence="from PheWAS database",
                              annotations={
                                  'bio2bel': MODULE_NAME,
                                  'OR': odds_ratio,
                              })

        if pd.notna(gene_symbol):
            hgnc_id = hgnc_name_to_id.get(gene_symbol)
            if hgnc_id is None:
                it.write(f'Missing identifier for {gene_symbol}')
            else:
                graph.add_association(pybel.dsl.Gene(
                    namespace="hgnc",
                    name=gene_symbol,
                    identifier=hgnc_id,
                ),
                                      pybel.dsl.Pathology(
                                          namespace="mesh",
                                          name=phenotype,
                                      ),
                                      citation="24270849",
                                      evidence="from PheWAS database",
                                      annotations={
                                          'bio2bel': MODULE_NAME,
                                          'OR': odds_ratio,
                                      })

    return graph
Example #8
0
    def test_get_top_hubs(self):
        """Test counting pathologies in the graph."""
        graph = BELGraph()
        a, b, c = protein(n(), n()), protein(n(), n()), pathology(n(), n())

        graph.add_association(a, b, citation=n(), evidence=n())
        graph.add_association(a, c, citation=n(), evidence=n())

        top_hubs = get_top_hubs(graph, n=1)
        self.assertEqual(1, len(top_hubs))
        node, degree = top_hubs[0]
        self.assertEqual(a, node)
        self.assertEqual(2, degree)
Example #9
0
    def test_has_polarity(self):
        g = BELGraph()
        a, b, c = (protein(n(), n()) for _ in range(3))
        key1 = g.add_increases(a, b, n(), n())
        self.assertTrue(has_polarity(g, a, b, key1))

        key2 = g.add_association(b, c, n(), n())
        self.assertFalse(has_polarity(g, b, c, key2))
Example #10
0
    def test_import_causal(self):
        """Check a directly increases."""
        bel_graph = BELGraph()
        bel_graph.add_directly_increases(A,
                                         B,
                                         citation=TEST_CITATION,
                                         evidence=TEST_EVIDENCE)
        bel_graph.add_directly_decreases(B,
                                         C,
                                         citation=TEST_CITATION,
                                         evidence=TEST_EVIDENCE)
        bel_graph.add_negative_correlation(A,
                                           C,
                                           citation=TEST_CITATION,
                                           evidence=TEST_EVIDENCE)
        bel_graph.add_association(A,
                                  D,
                                  citation=TEST_CITATION,
                                  evidence=TEST_EVIDENCE)

        expected = NxMixedGraph.from_edges(
            directed=[
                ('A', 'B'),
                ('B', 'C'),
            ],
            undirected=[
                ('A', 'C'),
            ],
        )
        self.nxmg_equal(expected,
                        bel_to_nxmg(bel_graph, include_associations=False))

        expected = NxMixedGraph.from_edges(
            directed=[
                ('A', 'B'),
                ('B', 'C'),
            ],
            undirected=[
                ('A', 'C'),
                ('A', 'D'),
            ],
        )
        self.nxmg_equal(expected,
                        bel_to_nxmg(bel_graph, include_associations=True))
Example #11
0
    def test_count_pathologies(self):
        """Test counting pathologies in the graph."""
        graph = BELGraph()
        a, b, c, d = protein(n(), n()), protein(n(), n()), pathology(n(), n()), pathology(n(), n())

        graph.add_association(a, c, n(), n())
        graph.add_association(a, d, n(), n())
        graph.add_association(b, d, n(), n())

        pathology_counter = count_pathologies(graph)
        self.assertIn(c, pathology_counter)
        self.assertIn(d, pathology_counter)
        self.assertEqual(1, pathology_counter[c])
        self.assertEqual(2, pathology_counter[d])

        top_pathology_counter = get_top_pathologies(graph, count=1)
        self.assertEqual(1, len(top_pathology_counter))
        node, count = top_pathology_counter[0]
        self.assertEqual(d, node)
        self.assertEqual(2, count)
Example #12
0
    def test_count_pathologies(self):
        """Test counting pathologies in the graph."""
        graph = BELGraph()
        a, b = (protein(namespace='HGNC', name=n()) for _ in range(2))
        c, d = (pathology(namespace='DOID', name=n()) for _ in range(2))

        graph.add_association(a, c, citation=n(), evidence=n())
        graph.add_association(a, d, citation=n(), evidence=n())
        graph.add_association(b, d, citation=n(), evidence=n())

        pathology_counter = count_pathologies(graph)
        self.assertIn(c, pathology_counter)
        self.assertIn(d, pathology_counter)
        self.assertEqual(1, pathology_counter[c])
        self.assertEqual(2, pathology_counter[d])

        top_pathology_counter = get_top_pathologies(graph, n=1)
        self.assertEqual(1, len(top_pathology_counter))
        node, count = top_pathology_counter[0]
        self.assertEqual(d, node)
        self.assertEqual(2, count)
Example #13
0
def make_graph_3() -> BELGraph:
    """Make an example graph.
        A -> B -| C
        D -| F -> C
        C -| F
        C -- G
        """
    graph = BELGraph(
        name='Lab course example',
        version='1.1.0',
        description='',
        authors='LSI',
        contact='*****@*****.**',
    )

    graph.add_increases(protein_a, protein_b, n(), n())
    graph.add_decreases(protein_b, gene_c, n(), n())
    graph.add_decreases(rna_d, gene_f, n(), n())
    graph.add_increases(protein_e, gene_f, n(), n())
    graph.add_increases(gene_f, gene_c, n(), n())
    graph.add_association(gene_c, protein_g, n(), n())

    return graph
Example #14
0
    def test_expand_upstream_causal_subgraph(self):
        """Test expanding on the upstream causal subgraph."""
        a, b, c, d, e, f = [
            protein(namespace='test', name=i)
            for i in string.ascii_lowercase[:6]
        ]

        universe = BELGraph()
        universe.add_increases(a, b, citation=n(), evidence=n())
        universe.add_increases(b, c, citation=n(), evidence=n())
        universe.add_association(d, a, citation=n(), evidence=n())
        universe.add_increases(e, a, citation=n(), evidence=n())
        universe.add_decreases(f, b, citation=n(), evidence=n())

        subgraph = BELGraph()
        subgraph.add_increases(a, b, citation=n(), evidence=n())

        expand_upstream_causal(universe, subgraph)

        self.assertIsInstance(subgraph, BELGraph)
        self.assert_all_nodes_are_base_entities(subgraph)

        self.assertIn(a, subgraph)
        self.assertIn(b, subgraph)
        self.assertNotIn(c, subgraph)
        self.assertNotIn(d, subgraph)
        self.assertIn(e, subgraph)
        self.assertIn(f, subgraph)
        self.assertEqual(4, subgraph.number_of_nodes())

        self.assert_in_edge(e, a, subgraph)
        self.assert_in_edge(a, b, subgraph)
        self.assert_in_edge(f, b, subgraph)
        self.assertEqual(2, len(subgraph[a][b]))
        self.assertEqual(4,
                         subgraph.number_of_edges(),
                         msg='\n'.join(map(str, subgraph.edges())))
Example #15
0
    def test_remove_pathologies(self):
        """Test removal of pathologies."""
        g = BELGraph()

        p1, p2, p3 = (Protein(namespace='HGNC', name=n()) for _ in range(3))
        d1, d2 = (pathology(namespace='MESH', name=n()) for _ in range(2))

        g.add_increases(p1, p2, citation=n(), evidence=n())
        g.add_increases(p2, p3, citation=n(), evidence=n())
        g.add_positive_correlation(p1, d1, citation=n(), evidence=n())
        g.add_positive_correlation(p2, d1, citation=n(), evidence=n())
        g.add_association(p2, d1, citation=n(), evidence=n())
        g.add_positive_correlation(d1, d2, citation=n(), evidence=n())
        g.add_positive_correlation(d1, d2, citation=n(), evidence=n())

        self.assertEqual(5, g.number_of_nodes())
        self.assertEqual(7, g.number_of_edges())
        self.assertEqual(2, len(g[p2][d1]))

        remove_associations(g)

        relations = list(g[p2][d1].values())
        self.assertEqual(1, len(relations))
        self.assertEqual(POSITIVE_CORRELATION, relations[0][RELATION])

        self.assertEqual(5, g.number_of_nodes())
        self.assertEqual(6, g.number_of_edges())
        self.assertEqual(5, g.number_of_nodes())

        remove_pathologies(g)

        self.assertTrue(p1, g)
        self.assertTrue(p2, g)
        self.assertTrue(p3, g)
        self.assertEqual(3, g.number_of_nodes())
        self.assertEqual(2, g.number_of_edges())
Example #16
0
    def test_remove_pathologies(self):
        """Test removal of pathologies."""
        g = BELGraph()

        p1, p2, p3 = (protein(namespace='HGNC', name=n()) for _ in range(3))
        d1, d2 = (pathology(namespace='MESH', name=n()) for _ in range(2))

        g.add_increases(p1, p2, n(), n())
        g.add_increases(p2, p3, n(), n())
        g.add_qualified_edge(p1, d1, POSITIVE_CORRELATION, n(), n())
        g.add_qualified_edge(p2, d1, POSITIVE_CORRELATION, n(), n())
        g.add_association(p2, d1, n(), n())
        g.add_qualified_edge(d1, d2, POSITIVE_CORRELATION, n(), n())
        g.add_qualified_edge(d1, d2, POSITIVE_CORRELATION, n(), n())

        self.assertEqual(5, g.number_of_nodes())
        self.assertEqual(7, g.number_of_edges())
        self.assertEqual(2, len(g[p2.as_tuple()][d1.as_tuple()]))

        remove_associations(g)

        relations = list(g[p2.as_tuple()][d1.as_tuple()].values())
        self.assertEqual(1, len(relations))
        self.assertEqual(POSITIVE_CORRELATION, relations[0][RELATION])

        self.assertEqual(5, g.number_of_nodes())
        self.assertEqual(6, g.number_of_edges())
        self.assertEqual(5, g.number_of_nodes())

        remove_pathologies(g)

        self.assertTrue(g.has_node_with_data(p1))
        self.assertTrue(g.has_node_with_data(p2))
        self.assertTrue(g.has_node_with_data(p3))
        self.assertEqual(3, g.number_of_nodes())
        self.assertEqual(2, g.number_of_edges())
Example #17
0
    def add_to_graph(self, graph: BELGraph) -> Optional[str]:
        """Add this annotation to the BEL graph."""
        sub = self.term.as_bel()
        obj = self.as_bel()

        if not sub or not obj:
            return

        return graph.add_association(self.term.as_bel(),
                                     self.as_bel(),
                                     evidence=self.evidence_code,
                                     citation=self._get_citation(),
                                     annotations={
                                         'Species': self.tax_id,
                                     })
Example #18
0
    def test_has_pathology(self):
        """Test for checking edges that have a causal pathology."""
        graph = BELGraph()

        a, b, c = protein(n(), n()), pathology(n(), n()), pathology(n(), n())

        key = graph.add_increases(a, b, n(), n())
        self.assertFalse(has_pathology_causal(graph, a, b, key))

        key = graph.add_increases(b, a, n(), n())
        self.assertTrue(has_pathology_causal(graph, b, a, key))

        key = graph.add_association(b, a, n(), n())
        self.assertFalse(has_pathology_causal(graph, b, a, key))

        key = graph.add_increases(a, c, n(), n())
        self.assertFalse(has_pathology_causal(graph, a, c, key))
Example #19
0
def get_graph() -> BELGraph:
    df = df_getter()
    graph = BELGraph(
        name='GWAS Catalog',
        version='1.0.2',
    )
    graph.namespace_pattern.update(
        dict(
            dbsnp=r'^rs\d+$',
            efo=r'^\d{7}$',
            hgnc=r'^((HGNC|hgnc):)?\d{1,5}$',
        ))

    it = tqdm(df.values, desc='Mapping GWAS Catalog to BEL')
    for (
            pmid,
            mapped_gene,
            dbsnp_id,
            context,
            intergenic,
            minus_log_p_value,
            risk_allele_frequency,
            or_or_beta,
            confidence_interval,
            mapped_trait,
            mapped_trait_uri,
    ) in it:
        if pd.isna(mapped_trait_uri):
            continue

        annotations = dict(
            minus_log_p_value=minus_log_p_value,
            risk_allele_frequency=risk_allele_frequency,
            odds_ratio_or_beta=or_or_beta,
            confidence_interval=confidence_interval,
        )

        if pd.notna(context):
            annotations['gwascatalog_context'] = {
                c.strip()
                for c in context.split(';')
            }

        dbsnp_node = Gene(
            namespace='dbsnp',
            identifier=dbsnp_id,
        )
        pathology_node = Pathology(
            namespace='efo',
            name=mapped_trait,
            identifier=mapped_trait_uri.split('/')[-1][4:],
        )

        graph.add_association(
            dbsnp_node,
            pathology_node,
            citation=str(pmid),
            evidence=MODULE_NAME,
            annotations=annotations,
        )

        if intergenic in {'0', '0.0', 0, 0.0}:
            gene_symbols = [
                gene_symbol.strip() for gene_symbol in mapped_gene.split(',')
            ]
            for gene_symbol in gene_symbols:
                hgnc_id = hgnc_name_to_id.get(gene_symbol)
                if hgnc_id is None:
                    continue
                    # TODO lookup for ensembl identifiers
                    # gene_node = Gene(
                    #     namespace='ensembl',
                    #     name=gene_symbol,
                    # )
                else:
                    gene_node = Gene(
                        namespace='hgnc',
                        identifier=hgnc_id,
                        name=gene_symbol,
                    )
                graph.add_has_variant(gene_node, dbsnp_node)
                graph.add_association(
                    gene_node,
                    pathology_node,
                    citation=str(pmid),
                    evidence=MODULE_NAME,
                    annotations=annotations,
                )

    return graph
Example #20
0
def get_neurommsig_bel(
    df: pd.DataFrame,
    disease: str,
    nift_values: Mapping[str, str],
) -> BELGraph:
    """Generate the NeuroMMSig BEL graph.

    :param df:
    :param disease:
    :param nift_values: a dictionary of lower-cased to normal names in NIFT
    """
    missing_features = set()
    fixed_caps = set()
    nift_value_originals = set(nift_values.values())

    graph = BELGraph(
        name=f'NeuroMMSigDB for {disease}',
        description=f'SNP and Clinical Features for Subgraphs in {disease}',
        authors=
        'Daniel Domingo-Fernández, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan',
        contact='*****@*****.**',
        version=time.strftime('%Y%m%d'),
    )

    for pathway, pathway_df in df.groupby(PATHWAY_COLUMN_NAME):
        sorted_pathway_df = pathway_df.sort_values(GENE_COLUMN_NAME)
        sliced_df = sorted_pathway_df[columns].itertuples()

        for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df:
            gene = ensure_quotes(gene)

            for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps
                                 or [], clinical_snps or []):
                if not snp.strip():
                    continue
                graph.add_association(
                    Gene('HGNC', gene),
                    Gene('DBSNP', snp),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

            for clinical_feature in clinical_features or []:
                if not clinical_feature.strip():
                    continue

                if clinical_feature.lower() not in nift_values:
                    missing_features.add(clinical_feature)
                    continue

                if clinical_feature not in nift_value_originals:
                    fixed_caps.add((clinical_feature,
                                    nift_values[clinical_feature.lower()]))
                    clinical_feature = nift_values[
                        clinical_feature.lower()]  # fix capitalization

                graph.add_association(
                    Gene('HGNC', gene),
                    Abundance('NIFT', clinical_feature),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

                if clinical_snps:
                    for clinical_snp in clinical_snps:
                        graph.add_association(
                            Gene('DBSNP', clinical_snp),
                            Abundance('NIFT', clinical_feature),
                            evidence=CANNED_EVIDENCE,
                            citation=CANNED_CITATION,
                            annotations={
                                'MeSHDisease': disease,
                            },
                        )

    if missing_features:
        logger.warning('Missing Features in %s', disease)
        for feature in missing_features:
            logger.warning(feature)

    if fixed_caps:
        logger.warning('Fixed capitalization')
        for broken, fixed in fixed_caps:
            logger.warning('%s -> %s', broken, fixed)

    return graph
Example #21
0
class TestAnnotation(unittest.TestCase):
    """Tests for getting sub-graphs by annotation."""
    def setUp(self):
        """Set up the test case with a pre-populated BEL graph."""

        self.graph = BELGraph()

        self.graph.namespace_url['test'] = test_namespace_url
        self.graph.annotation_url['subgraph'] = test_annotation_url

        # A increases/decreases B.
        self.graph.add_increases(a,
                                 b,
                                 citation=citation,
                                 evidence=evidence,
                                 annotations={'subgraph': {'1', '2'}})
        self.graph.add_decreases(a,
                                 b,
                                 citation=citation,
                                 evidence=evidence,
                                 annotations={'subgraph': {'1'}})

        # B increases association with C.
        self.graph.add_increases(b,
                                 c,
                                 citation=citation,
                                 evidence=evidence,
                                 annotations={'subgraph': {'1', '2'}})
        self.graph.add_association(b,
                                   c,
                                   citation=citation,
                                   evidence=evidence,
                                   annotations={'subgraph': {'2'}})

        # C increases D
        self.graph.add_increases(c, d, citation=citation, evidence=evidence)

        self.graph.add_increases(d,
                                 e,
                                 citation=citation,
                                 evidence=evidence,
                                 annotations={'subgraph': {'1', '2'}})
        self.graph.add_qualified_edge(d,
                                      e,
                                      relation=CAUSES_NO_CHANGE,
                                      evidence=evidence,
                                      citation=citation,
                                      annotations={'subgraph': {'1', '2'}})

    def test_contradictions_finder(self):
        """Simple test to find contradictions."""
        contradictory_edges = get_contradiction_summary(self.graph)

        contradictions = [(protein(namespace='test', name='0'),
                           protein(namespace='test',
                                   name='1'), {'decreases', 'increases'}),
                          (protein(namespace='test', name='3'),
                           protein(namespace='test',
                                   name='4'), {'causesNoChange', 'increases'})]

        for edge in contradictory_edges:
            self.assertIn(edge, contradictions)
Example #22
0
class TestReconstituteEdges(TemporaryCacheMixin):
    """This class tests that edges with varying properties can be added and extracted losslessly"""
    def setUp(self):
        """Creates a unit test with a manager and graph"""
        super().setUp()
        self.graph = BELGraph(name=n(), version=n())

    @mock_bel_resources
    def test_translocation_default(self, mock):
        """This test checks that a translocation gets in the database properly"""
        self.graph.add_increases(
            Protein(name='F2', namespace='HGNC'),
            Protein(name='EDN1', namespace='HGNC'),
            evidence=
            'In endothelial cells, ET-1 secretion is detectable under basal conditions, whereas thrombin '
            'induces its secretion.',
            citation='10473669',
            subject_modifier=secretion())

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2,
                         network.nodes.count(),
                         msg='Missing one or both of the nodes.')
        self.assertEqual(1, network.edges.count(), msg='Missing the edge')

        edge = network.edges.first()
        self.assertEqual(2, edge.properties.count())

    @mock_bel_resources
    def test_subject_translocation_custom_to_loc(self, mock):
        self.graph.add_increases(
            Protein(name='F2', namespace='HGNC'),
            Protein(name='EDN1', namespace='HGNC'),
            evidence=
            'In endothelial cells, ET-1 secretion is detectable under basal conditions, whereas thrombin induces its secretion.',
            citation='10473669',
            subject_modifier=translocation(
                from_loc=Entity(namespace='TEST', name='A'),
                to_loc=Entity(namespace='GO', name='extracellular space'),
            ))

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(2, edge.properties.count())

    @mock_bel_resources
    def test_subject_activity_default(self, mock):
        p1_name = n()
        p2_name = n()

        self.graph.add_increases(Protein(name=p1_name, namespace='HGNC'),
                                 Protein(name=p2_name, namespace='HGNC'),
                                 evidence=n(),
                                 citation=n(),
                                 subject_modifier=activity('kin'))

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count(), msg='number of nodes')
        self.assertEqual(1, network.edges.count(), msg='number of edges')

        kin_list = self.manager.session.query(NamespaceEntry).filter(
            NamespaceEntry.name == 'kin').all()
        self.assertEqual(1,
                         len(kin_list),
                         msg='number of kinase NamespaceEntrys')

        kin = list(kin_list)[0]
        self.assertEqual('kin', kin.name)

        effects = self.manager.session.query(Property).join(
            NamespaceEntry).filter(Property.effect == kin)
        self.assertEqual(1, effects.count(), msg='number of effects')

    @mock_bel_resources
    def test_subject_activity_custom(self, mock):
        p1_name = n()
        p2_name = n()
        dummy_activity_namespace = n()
        dummy_activity_name = n()

        self.graph.add_increases(Protein(name=p1_name, namespace='HGNC'),
                                 Protein(name=p2_name, namespace='HGNC'),
                                 evidence=n(),
                                 citation=n(),
                                 subject_modifier=activity(
                                     name=dummy_activity_name,
                                     namespace=dummy_activity_namespace))

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        kin_list = self.manager.session.query(NamespaceEntry).filter(
            NamespaceEntry.name == dummy_activity_name).all()
        self.assertEqual(1, len(kin_list))

        kin = list(kin_list)[0]
        self.assertEqual(dummy_activity_name, kin.name)

        effects = self.manager.session.query(Property).join(
            NamespaceEntry).filter(Property.effect == kin)
        self.assertEqual(1, effects.count())

    @mock_bel_resources
    def test_object_activity_default(self, mock):
        p1_name = n()
        p2_name = n()

        self.graph.add_increases(Protein(name=p1_name, namespace='HGNC'),
                                 Protein(name=p2_name, namespace='HGNC'),
                                 evidence=n(),
                                 citation=n(),
                                 object_modifier=activity('kin'))

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        kin_list = self.manager.session.query(NamespaceEntry).filter(
            NamespaceEntry.name == 'kin').all()
        self.assertEqual(1, len(kin_list))

        kin = list(kin_list)[0]
        self.assertEqual('kin', kin.name)

        effects = self.manager.session.query(Property).join(
            NamespaceEntry).filter(Property.effect == kin)
        self.assertEqual(1, effects.count())

    @mock_bel_resources
    def test_object_activity_custom(self, mock):
        p1_name = n()
        p2_name = n()
        dummy_activity_namespace = n()
        dummy_activity_name = n()

        self.graph.add_increases(Protein(name=p1_name, namespace='HGNC'),
                                 Protein(name=p2_name, namespace='HGNC'),
                                 evidence=n(),
                                 citation=n(),
                                 object_modifier=activity(
                                     name=dummy_activity_name,
                                     namespace=dummy_activity_namespace))

        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        kin_list = self.manager.session.query(NamespaceEntry).filter(
            NamespaceEntry.name == dummy_activity_name).all()
        self.assertEqual(1, len(kin_list))

        kin = list(kin_list)[0]
        self.assertEqual(dummy_activity_name, kin.name)

        effects = self.manager.session.query(Property).join(
            NamespaceEntry).filter(Property.effect == kin)
        self.assertEqual(1, effects.count())

    def test_subject_degradation(self):
        self.graph.add_association(
            Protein(name='YFG', namespace='HGNC'),
            Protein(name='YFG2', namespace='HGNC'),
            evidence=n(),
            citation=n(),
            subject_modifier=degradation(),
        )
        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)

        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(1, edge.properties.count())

    def test_object_degradation(self):
        self.graph.add_association(
            Protein(name='YFG', namespace='HGNC'),
            Protein(name='YFG2', namespace='HGNC'),
            evidence=n(),
            citation=n(),
            object_modifier=degradation(),
        )
        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)

        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(1, edge.properties.count())

    def test_subject_location(self):
        self.graph.add_association(Protein(name='YFG', namespace='HGNC'),
                                   Protein(name='YFG2', namespace='HGNC'),
                                   evidence=n(),
                                   citation=n(),
                                   subject_modifier=location(
                                       Entity(namespace='GO',
                                              name='nucleus',
                                              identifier='GO:0005634')))
        make_dummy_namespaces(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)

        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(1, edge.properties.count())

    def test_mixed_1(self):
        """Test mixed having location and something else."""
        self.graph.add_increases(
            Protein(namespace='HGNC', name='CDC42'),
            Protein(namespace='HGNC', name='PAK2'),
            evidence=
            """Summary: PAK proteins, a family of serine/threonine p21-activating kinases, include PAK1, PAK2,
         PAK3 and PAK4. PAK proteins are critical effectors that link Rho GTPases to cytoskeleton reorganization
         and nuclear signaling. They serve as targets for the small GTP binding proteins Cdc42 and Rac and have
         been implicated in a wide range of biological activities. PAK4 interacts specifically with the GTP-bound
         form of Cdc42Hs and weakly activates the JNK family of MAP kinases. PAK4 is a mediator of filopodia
         formation and may play a role in the reorganization of the actin cytoskeleton. Multiple alternatively
         spliced transcript variants encoding distinct isoforms have been found for this gene.""",
            citation={
                CITATION_DB: "Online Resource",
                CITATION_IDENTIFIER: "PAK4 Hs ENTREZ Gene Summary"
            },
            annotations={'Species': '9606'},
            subject_modifier=activity('gtp'),
            object_modifier=activity('kin'),
        )

        make_dummy_namespaces(self.manager, self.graph)
        make_dummy_annotations(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(2, edge.properties.count())

        subject = edge.properties.filter(Property.is_subject).one()
        self.assertTrue(subject.is_subject)
        self.assertEqual('gtp', subject.effect.name)
        self.assertIsNotNone(subject.effect.namespace)
        self.assertEqual(BEL_DEFAULT_NAMESPACE,
                         subject.effect.namespace.keyword)

        object = edge.properties.filter(not_(Property.is_subject)).one()
        self.assertFalse(object.is_subject)
        self.assertEqual('kin', object.effect.name)
        self.assertIsNotNone(object.effect.namespace)
        self.assertEqual(BEL_DEFAULT_NAMESPACE,
                         object.effect.namespace.keyword)

    def test_mixed_2(self):
        """Tests both subject and object activity with location information as well."""
        self.graph.add_directly_increases(
            Protein(namespace='HGNC', name='HDAC4'),
            Protein(namespace='HGNC', name='MEF2A'),
            citation='10487761',
            evidence=
            """"In the nucleus, HDAC4 associates with the myocyte enhancer factor MEF2A. Binding of HDAC4 to
        MEF2A results in the repression of MEF2A transcriptional activation, a function that requires the
        deacetylase domain of HDAC4.""",
            annotations={'Species': '9606'},
            subject_modifier=activity('cat',
                                      location=Entity(namespace='GO',
                                                      name='nucleus')),
            object_modifier=activity('tscript',
                                     location=Entity(namespace='GO',
                                                     name='nucleus')))

        make_dummy_namespaces(self.manager, self.graph)
        make_dummy_annotations(self.manager, self.graph)

        network = self.manager.insert_graph(self.graph)
        self.assertEqual(2, network.nodes.count())
        self.assertEqual(1, network.edges.count())

        edge = network.edges.first()
        self.assertEqual(4, edge.properties.count())
        self.assertEqual(2,
                         edge.properties.filter(Property.is_subject).count())
        self.assertEqual(
            2,
            edge.properties.filter(not_(Property.is_subject)).count())
Example #23
0
def _add_row(
    graph: BELGraph,
    relation: str,
    source_prefix: str,
    source_id: str,
    source_name: Optional[str],
    target_prefix: str,
    target_id: str,
    target_name: Optional[str],
    pubmed_id: str,
    int_detection_method: str,
    source_database: str,
    confidence: str,
) -> None:  # noqa:C901
    """Add for every PubMed ID an edge with information about relationship type, source and target.

    :param source_database: row value of column source_database
    :param graph: graph to add edges to
    :param relation: row value of column relation
    :param source_prefix: row value of source prefix
    :param source_id: row value of source id
    :param target_prefix: row value of target prefix
    :param target_id: row value of target id
    :param pubmed_id: row value of column PubMed_id
    :param int_detection_method: row value of column interaction detection method
    :param confidence: row value of confidence score column
    :return: None
    """
    if pubmed_id is None:
        pubmed_id = 'database', 'intact'

    annotations = {
        'psi-mi': relation,
        'intact-detection': int_detection_method,
        'intact-source': source_database,
        'intact-confidence': confidence,
    }

    # map double spaces to single spaces in relation string
    relation = ' '.join(relation.split())

    source_dsl = NAMESPACE_TO_DSL.get(source_prefix, pybel.dsl.Protein)
    source = source_dsl(
        namespace=source_prefix,
        identifier=source_id,
        name=source_name,
    )
    target_dsl = NAMESPACE_TO_DSL.get(target_prefix, pybel.dsl.Protein)
    target = target_dsl(
        namespace=target_prefix,
        identifier=target_id,
        name=target_name,
    )

    if relation in PROTEIN_INCREASES_MOD_DICT:
        graph.add_increases(
            source,
            target.with_variants(PROTEIN_INCREASES_MOD_DICT[relation]),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
            subject_modifier=SUBJECT_ACTIVITIES.get(relation),
        )

    # dna strand elongation
    elif relation == 'psi-mi:"MI:0701"(dna strand elongation)':
        target_mod = pybel.dsl.Gene(
            namespace=target_prefix,
            identifier=target_id,
            name=target_name,
            variants=[
                GeneModification(
                    name='DNA strand elongation',
                    namespace='go',
                    identifier='0022616',
                ),
            ],
        )
        graph.add_increases(
            source,
            target_mod,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # DECREASES
    elif relation in INTACT_DECREASES_ACTIONS:
        #: dna cleavage: Covalent bond breakage of a DNA molecule leading to the formation of smaller fragments
        if relation == 'psi-mi:"MI:0572"(dna cleavage)':
            target_mod = pybel.dsl.Gene(
                namespace=target_prefix,
                identifier=source_id,
                name=target_name,
            )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        #: rna cleavage: Any process by which an RNA molecule is cleaved at specific sites or in a regulated manner
        elif relation == 'psi-mi:"MI:0902"(rna cleavage)':
            target_mod = pybel.dsl.Rna(
                namespace=target_prefix,
                identifier=source_id,
                name=target_name,
            )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )

        # cleavage
        elif relation in {
                #: Covalent bond breakage in a molecule leading to the formation of smaller molecules
                'psi-mi:"MI:0194"(cleavage reaction)',
                #: Covalent modification of a polypeptide occuring during its maturation or its proteolytic degradation
                'psi-mi:"MI:0570"(protein cleavage)',
        }:
            graph.add_decreases(
                source,
                target,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )

        #: Reaction monitoring the cleavage (hydrolysis) or a lipid molecule
        elif relation == 'psi-mi:"MI:1355"(lipid cleavage)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='lipid catabolic process',
                    namespace='go',
                    identifier='0016042',
                ), )

            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        #: 'lipoprotein cleavage reaction': Cleavage of a lipid group covalently bound to a protein residue
        elif relation == 'psi-mi:"MI:0212"(lipoprotein cleavage reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='lipoprotein modification',
                    namespace='go',
                    identifier='0042160',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        # deformylation reaction
        elif relation == 'psi-mi:"MI:0199"(deformylation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein formylation',
                    namespace='go',
                    identifier='0018256',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein deamidation
        elif relation == 'psi-mi:"MI:2280"(deamidation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein amidation',
                    namespace='go',
                    identifier='0018032',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        # protein decarboxylation
        elif relation == 'psi-mi:"MI:1140"(decarboxylation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein carboxylation',
                    namespace='go',
                    identifier='0018214',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein deamination:
        elif relation == 'psi-mi:"MI:0985"(deamination reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='amine binding',
                    namespace='go',
                    identifier='0043176',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein modification
        elif relation in PROTEIN_DECREASES_MOD_DICT:
            target_mod = target.with_variants(
                PROTEIN_DECREASES_MOD_DICT[relation])
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        else:
            raise ValueError(
                f"The relation {relation} is not in DECREASE relations.")

    # ASSOCIATION:
    elif relation in INTACT_ASSOCIATION_ACTIONS:
        graph.add_association(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # REGULATES:
    elif relation in INTACT_REGULATES_ACTIONS:
        graph.add_regulates(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # BINDS
    elif relation in INTACT_BINDS_ACTIONS:
        graph.add_binds(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # no specified relation
    else:
        raise ValueError(
            f"Unspecified relation {relation} between {source} and {target}")