Esempio n. 1
0
    def process_on_hold(on_hold, complexes_by_id, complexes):

        on_hold_next = []

        for name, components, id_ in on_hold:

            components = [
                [comp.components for comp in complexes_by_id[comp_id]]
                if comp_id in complexes_by_id else ((comp_id, ), )
                for comp_id in components
            ]

            for components0 in itertools.product(*components):

                this_components = list(itertools.chain(*components0))

                if any(
                        comp.startswith('SIGNOR-C')
                        for comp in this_components):

                    on_hold_next.append((name, this_components, id_))

                else:

                    cplex = intera.Complex(
                        name=name.replace('"', '').strip(),
                        components=this_components,
                        sources='SIGNOR',
                        ids=id_,
                    )

                    complexes[cplex.__str__()] = cplex
                    complexes_by_id[id_].add(cplex)

        return on_hold_next, complexes_by_id, complexes
Esempio n. 2
0
    def get_partners(components, sources, references):

        return {(comp[0] if len(comp) == 1 else intera.Complex(
            components=comp,
            sources=sources,
            references=references,
        ))
                for comp in components}
Esempio n. 3
0
    def name_method(rec):
        comp = get_stoichiometry(rec)

        cplex = intera.Complex(
            name=rec['complex_name'],
            components=comp,
            sources='CellPhoneDB',
            ids=rec['complex_name'],
        )

        return cplex
Esempio n. 4
0
def _icellnet_get_entity(components, references):

    if len(components) > 1:

        return intera.Complex(
            components=components,
            sources='ICELLNET',
            references=references,
        )

    elif len(components) == 1:

        return components[0]
Esempio n. 5
0
def _cellchatdb_process_complexes(raw, organism = 9606):

    if isinstance(raw, dict):

        raw = raw['complex']

    ncbi_tax_id = _cellchatdb_organism(organism)

    complexes = {}

    for row in raw.itertuples():

        genesymbols = [c for c in row[1:-1] if c]

        uniprots = [
            mapping.map_name(
                gs,
                'genesymbol',
                'uniprot',
                ncbi_tax_id = ncbi_tax_id,
            )
            for gs in genesymbols
        ]

        uniprots = [up for up in uniprots if up]

        for components in itertools.product(*uniprots):

            cplex = intera.Complex(
                name = row.rownames,
                components = components,
                sources = 'CellTalkDB',
                ncbi_tax_id = ncbi_tax_id,
            )
            complexes[cplex.__str__()] = cplex

    return complexes
Esempio n. 6
0
    def process_complex(ids, symbols, types):

        if ids not in cplexes:

            if not all(t == 'gene' for t in types):

                cplexes[ids] = set()

            uniprots = [
                process_protein(id_, symbol)
                for id_, symbol in zip(ids, symbols)
            ]

            this_cplexes = {
                intera.Complex(
                    components=components,
                    sources='KEGG-MEDICUS',
                )
                for components in itertools.product(*uniprots)
            }

            cplexes[ids] = this_cplexes

        return cplexes[ids]
Esempio n. 7
0
def signor_complexes(organism=9606):
    #TODO: implement organism

    def process_on_hold(on_hold, complexes_by_id, complexes):

        on_hold_next = []

        for name, components, id_ in on_hold:

            components = [
                [comp.components for comp in complexes_by_id[comp_id]]
                if comp_id in complexes_by_id else ((comp_id, ), )
                for comp_id in components
            ]

            for components0 in itertools.product(*components):

                this_components = list(itertools.chain(*components0))

                if any(
                        comp.startswith('SIGNOR-C')
                        for comp in this_components):

                    on_hold_next.append((name, this_components, id_))

                else:

                    cplex = intera.Complex(
                        name=name.replace('"', '').strip(),
                        components=this_components,
                        sources='SIGNOR',
                        ids=id_,
                    )

                    complexes[cplex.__str__()] = cplex
                    complexes_by_id[id_].add(cplex)

        return on_hold_next, complexes_by_id, complexes

    complexes = {}
    on_hold = []

    families = signor_protein_families(organism=organism)

    url = urls.urls['signor']['complexes']
    c = curl.Curl(
        url,
        binary_data=[(b'submit', b'Download complex data')],
        large=True,
    )
    _ = next(c.result)

    complexes_by_id = collections.defaultdict(set)

    for rec in c.result:

        rec = rec.split(';')
        components = [u.strip('\n\r" ') for u in rec[2].split(',')]

        components = [
            families[comp] if comp in families else [comp]
            for comp in components
        ]

        for this_components in itertools.product(*components):

            # some complex contains other complexes
            if any(comp.startswith('SIGNOR-C') for comp in this_components):

                on_hold.append((rec[1], this_components, rec[0]))

            else:

                cplex = intera.Complex(
                    name=rec[1].replace('"', '').strip(),
                    components=this_components,
                    sources='Signor',
                    ids=rec[0],
                )

                complexes[cplex.__str__()] = cplex
                complexes_by_id[rec[0]].add(cplex)

    while True:

        # complexes are defined recursively
        count_on_hold = len(on_hold)
        on_hold, complexes_by_id, complexes = (process_on_hold(
            on_hold, complexes_by_id, complexes))

        if len(on_hold) == count_on_hold:

            break

    return complexes