예제 #1
0
파일: manager.py 프로젝트: bio2bel/mirbase
    def to_bel(self) -> BELGraph:
        """Convert miRBase to BEL."""
        result = BELGraph()

        for sequence in self._get_query(Sequence):
            mirbase_node = sequence.as_pybel()

            for xref in sequence.xrefs:
                result.add_equivalence(
                    mirbase_node,
                    MicroRna(
                        namespace=xref.database,
                        identifier=xref.database_id,
                    ))

        return result
예제 #2
0
 def test_mirna_reference(self):
     self.assertEqual('m(HGNC:MIR1)',
                      str(MicroRna(namespace='HGNC', name='MIR1')))
예제 #3
0
CHEBI = 'CHEBI'

g1 = Gene(namespace=HGNC, name='1')
r1 = Rna(namespace=HGNC, name='1')
p1 = Protein(HGNC, name='1')

g2 = Gene(HGNC, name='2')
r2 = Rna(HGNC, name='2')
p2 = Protein(HGNC, name='2')

g3 = Gene(namespace=HGNC, name='3')
r3 = Rna(namespace=HGNC, name='3')
p3 = Protein(namespace=HGNC, name='3')

g4 = Gene(namespace=HGNC, name='4')
m4 = MicroRna(namespace=HGNC, name='4')

a5 = Abundance(namespace=CHEBI, name='5')
p5 = Pathology(namespace=GO, name='5')


class TestCollapseProteinInteractions(unittest.TestCase):
    def test_protein_interaction_1(self):
        graph = BELGraph()

        graph.add_node_from_data(p1)
        graph.add_node_from_data(p2)
        graph.add_node_from_data(a5)
        graph.add_node_from_data(p5)

        graph.add_qualified_edge(p1, p2, relation=POSITIVE_CORRELATION, citation=n(), evidence=n())
예제 #4
0
)

BEL_THOROUGH_NODES = {
    oxygen_atom,
    tmprss2_erg_rna_fusion,
    tmprss2_erg_rna_fusion_unspecified,
    akt_methylated,
    bcr_jak2_rna_fusion,
    chchd4_aifm1_rna_fusion,
    akt1_gene,
    akt1_phe_508_del,
    akt1,
    Gene('HGNC', 'AKT1', variants=Hgvs('c.308G>A')),
    tmprss2_erg_gene_fusion,
    Gene('HGNC', 'AKT1', variants=[Hgvs('c.1521_1523delCTT'), Hgvs('c.308G>A'), Hgvs('p.Phe508del')]),
    MicroRna('HGNC', 'MIR21'),
    bcr_jak2_gene_fusion,
    Gene('HGNC', 'CFTR', variants=Hgvs('c.1521_1523delCTT')),
    Gene('HGNC', 'CFTR'),
    Gene('HGNC', 'CFTR', variants=Hgvs('g.117199646_117199648delCTT')),
    Gene('HGNC', 'CFTR', variants=Hgvs('c.1521_1523delCTT')),
    Protein('HGNC', 'AKT1', variants=ProteinModification('Ph', 'Ser', 473)),
    MicroRna('HGNC', 'MIR21', variants=Hgvs('p.Phe508del')),
    Protein('HGNC', 'AKT1', variants=Hgvs('p.C40*')),
    Protein('HGNC', 'AKT1', variants=[Hgvs('p.Ala127Tyr'), ProteinModification('Ph', 'Ser')]),
    chchd4_aifm1_gene_fusion,
    tmprss2_erg_protein_fusion,
    Protein('HGNC', 'AKT1', variants=Hgvs('p.Arg1851*')),
    bcr_jak2_protein_fusion,
    Protein('HGNC', 'AKT1', variants=Hgvs('p.40*')),
    chchd4_aifm1_protein_fusion,
예제 #5
0
def _rela(x, y=None):
    return {RELATION: x, OBJECT: activity(y)}


def _assoc(y):
    return {RELATION: ASSOCIATION, 'association_type': y}


a1 = Abundance('CHEBI', '1')
p1 = Protein('HGNC', '1')
pf1 = Protein('INTERPRO', '1')
d1 = Pathology('MESH', '1')
b1 = BiologicalProcess('GO', '1')
b2 = BiologicalProcess('GO', '2')
m1 = MicroRna('MIRBASE', '1')
r1 = Rna('HGNC', '1')
r2 = Rna('HGNC', '2')
nca1 = NamedComplexAbundance('FPLX', '1')
pop1 = Population('taxonomy', '1')

p2 = Protein('HGNC', identifier='9236')
p3 = Protein('HGNC', identifier='9212')
r3 = p3.get_rna()
g3 = r3.get_gene()

c1 = ComplexAbundance([p2, g3])
c2 = ComplexAbundance([p1, p2])
c3 = ComplexAbundance([a1, p2])

converters_true_list = [
예제 #6
0
def normalize_graph_names(graph: BELGraph, database: str) -> None:
    """Normalize graph names."""
    # Victim to Survivor (one to one node) mapping
    one_to_one_mapping = {}
    # Victim to Survivors (one to many nodes) mapping
    one_to_many_mapping = defaultdict(set)

    for node in graph.nodes():

        # Skip ListAbundances and Reactions since they do not have a name
        if isinstance(node, ListAbundance) or isinstance(
                node, Reaction) or not node.name:
            continue

        # Normalize names: Lower case name and strip quotes or white spaces
        lower_name = node.name.lower().strip('"').strip()

        # Dealing with Genes/miRNAs
        if isinstance(node, CentralDogma):

            ##################
            # miRNA entities #
            ##################

            if lower_name.startswith("mir"):

                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            one_to_many_mapping[node].add(
                                MicroRna(
                                    node.namespace,
                                    name=lower_name.replace("mir-", "mir"),
                                    identifier=node.identifier,
                                ), )

                    if lower_name.endswith(' genes'):
                        lower_name = lower_name[:-len(' genes')]
                    elif lower_name.endswith(' gene'):
                        lower_name = lower_name[:-len(' gene')]
                    one_to_one_mapping[node] = MicroRna(
                        node.namespace,
                        name=lower_name.replace(
                            "mir-", "mir"),  # Special case for Reactome
                    )
                    continue

                # KEGG and Reactome
                one_to_one_mapping[node] = MicroRna(
                    node.namespace,
                    name=node.name.replace("mir-", "mir"),
                    identifier=node.identifier,
                )

            ##################
            # Genes entities #
            ##################

            else:
                # Reactome preprocessing to flat multiple identifiers
                if database == REACTOME:
                    reactome_cell = munge_reactome_gene(lower_name)
                    if isinstance(reactome_cell, list):
                        for lower_name in reactome_cell:
                            if lower_name in BLACK_LIST_REACTOME:  # Filter entities in black list
                                continue
                            elif lower_name.startswith(
                                    "("):  # remove redundant parentheses
                                lower_name = lower_name.strip("(").strip(")")

                            one_to_many_mapping[node].add(
                                Protein(node.namespace,
                                        name=lower_name,
                                        identifier=node.identifier), )
                    else:
                        one_to_one_mapping[node] = Protein(
                            node.namespace,
                            name=lower_name,
                            identifier=node.identifier)

                    continue

                # WikiPathways and KEGG do not require any processing of genes
                elif database == WIKIPATHWAYS and lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                one_to_one_mapping[node] = Protein(node.namespace,
                                                   name=lower_name,
                                                   identifier=node.identifier)

        #######################
        # Metabolite entities #
        #######################

        elif isinstance(node, Abundance):

            if database == 'wikipathways':
                # Biological processes that are captured as abundance in
                # BEL since they were characterized wrong in WikiPathways
                if lower_name in WIKIPATHWAYS_BIOL_PROCESS:
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Abundances to BiologicalProcesses
                elif (node.namespace
                      in {'WIKIDATA', 'WIKIPATHWAYS', 'REACTOME'}
                      and lower_name not in WIKIPATHWAYS_METAB):
                    one_to_one_mapping[node] = BiologicalProcess(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Fix naming in duplicate entity
                if lower_name in WIKIPATHWAYS_NAME_NORMALIZATION:
                    lower_name = WIKIPATHWAYS_NAME_NORMALIZATION[lower_name]

            elif database == REACTOME:
                # Curated proteins that were coded as metabolites
                if lower_name in REACTOME_PROT:
                    one_to_one_mapping[node] = Protein(
                        node.namespace,
                        name=lower_name,
                        identifier=node.identifier,
                    )
                    continue

                # Flat multiple identifiers (this is not trivial because most of ChEBI names contain commas,
                # so a clever way to fix some of the entities is to check that all identifiers contain letters)
                elif "," in lower_name and all(
                        string.isalpha() for string in lower_name.split(",")):
                    for string in lower_name.split(","):
                        one_to_many_mapping[node].add(
                            Abundance(node.namespace,
                                      name=string,
                                      identifier=node.identifier), )
                    continue

            one_to_one_mapping[node] = Abundance(node.namespace,
                                                 name=lower_name,
                                                 identifier=node.identifier)

        #################################
        # Biological Processes entities #
        #################################

        elif isinstance(node, BiologicalProcess):
            # KEGG normalize name by removing the title prefix
            if lower_name.startswith('title:'):
                lower_name = lower_name[len('title:'):]

            one_to_one_mapping[node] = BiologicalProcess(
                node.namespace,
                name=lower_name,
                identifier=node.identifier,
            )

    relabel_nodes(graph, one_to_one_mapping)
    multi_relabel(graph, one_to_many_mapping)
예제 #7
0
    'HGNC', 'TMPRSS2'),
                                               partner_3p=Rna('HGNC', 'ERG'))

BEL_THOROUGH_NODES = {
    oxygen_atom, tmprss2_erg_rna_fusion, tmprss2_erg_rna_fusion_unspecified,
    akt_methylated, bcr_jak2_rna_fusion, chchd4_aifm1_rna_fusion, akt1_gene,
    akt1_phe_508_del, akt1,
    Gene('HGNC', 'AKT1', variants=Hgvs('c.308G>A')), tmprss2_erg_gene_fusion,
    Gene('HGNC',
         'AKT1',
         variants=[
             Hgvs('c.1521_1523delCTT'),
             Hgvs('c.308G>A'),
             Hgvs('p.Phe508del')
         ]),
    MicroRna('HGNC', 'MIR21'), bcr_jak2_gene_fusion,
    Gene('HGNC', 'CFTR', variants=Hgvs('c.1521_1523delCTT')),
    Gene('HGNC', 'CFTR'),
    Gene('HGNC', 'CFTR', variants=Hgvs('g.117199646_117199648delCTT')),
    Gene('HGNC', 'CFTR', variants=Hgvs('c.1521_1523delCTT')),
    Protein('HGNC', 'AKT1', variants=ProteinModification('Ph', 'Ser', 473)),
    MicroRna('HGNC', 'MIR21', variants=Hgvs('p.Phe508del')),
    Protein('HGNC', 'AKT1', variants=Hgvs('p.C40*')),
    Protein('HGNC',
            'AKT1',
            variants=[Hgvs('p.Ala127Tyr'),
                      ProteinModification('Ph', 'Ser')]),
    chchd4_aifm1_gene_fusion, tmprss2_erg_protein_fusion,
    Protein('HGNC', 'AKT1',
            variants=Hgvs('p.Arg1851*')), bcr_jak2_protein_fusion,
    Protein('HGNC', 'AKT1',