Esempio n. 1
0
    def add_hms_lincs_xrefs(self):
        from indra.databases.lincs_client import LincsClient
        lc = LincsClient()

        edges = []
        for hmsl_id, data in lc._sm_data.items():
            if '-' in hmsl_id:
                hmsl_base_id, suffix = hmsl_id.split('-')
            else:
                hmsl_base_id, suffix = hmsl_id, None
            if suffix == '999':
                continue
            refs = lc.get_small_molecule_refs(hmsl_id)
            for ref_ns, ref_id in refs.items():
                edges.append(
                    (self.label('HMS-LINCS',
                                hmsl_base_id), self.label(ref_ns, ref_id), {
                                    'type': 'xref',
                                    'source': 'hms-lincs'
                                }))
                edges.append((self.label(ref_ns, ref_id),
                              self.label('HMS-LINCS', hmsl_base_id), {
                                  'type': 'xref',
                                  'source': 'hms-lincs'
                              }))
        self.add_edges_from(edges)
Esempio n. 2
0
    def __init__(self, lincs_data):
        self._data = lincs_data
        self._lc = LincsClient()

        # Process all the lines (skipping the header)
        self.statements = []
        for line in self._data:
            self._process_line(line)
        return
Esempio n. 3
0
class TasProcessor(object):
    """A processor for the Target Affinity Spectrum data table."""
    def __init__(self, data, affinity_class_limit):
        self._data = data
        self._lc = LincsClient()
        self.affinity_class_limit = affinity_class_limit

        self.statements = []
        for row in data:
            # Skip rows that are above the affinity class limit
            if int(row['class_min']) > affinity_class_limit:
                continue
            self._process_row(row)
        return

    def _process_row(self, row):
        drug = self._extract_drug(row['hms_id'])
        prot = self._extract_protein(row['approved_symbol'], row['gene_id'])
        ev = self._make_evidence(row['class_min'])
        # NOTE: there are several entries in this data set that refer to
        # non-human Entrez genes, e.g.
        # https://www.ncbi.nlm.nih.gov/gene/3283880
        # We skip these for now because resources for Entrez-based
        # mappings for non-human genes are not integrated, and would cause
        # pre-assembly issues.
        if 'HGNC' not in prot.db_refs:
            return
        self.statements.append(Inhibition(drug, prot, evidence=ev))

    def _extract_drug(self, hms_id):
        refs = self._lc.get_small_molecule_refs(hms_id)
        name = self._lc.get_small_molecule_name(hms_id)
        if 'PUBCHEM' in refs:
            chebi_id = chebi_client.get_chebi_id_from_pubchem(refs['PUBCHEM'])
            if chebi_id:
                refs['CHEBI'] = 'CHEBI:%s' % chebi_id
        return Agent(name, db_refs=refs)

    def _extract_protein(self, name, gene_id):
        refs = {'EGID': gene_id}
        hgnc_id = hgnc_client.get_hgnc_from_entrez(gene_id)
        if hgnc_id is not None:
            refs['HGNC'] = hgnc_id
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                refs['UP'] = up_id
            # If there is a HGNC ID, we standardize the gene name
            name = hgnc_client.get_hgnc_name(hgnc_id)
        return Agent(name, db_refs=refs)

    def _make_evidence(self, class_min):
        ev = Evidence(source_api='tas', epistemics={'direct': True},
                      annotations={'class_min': CLASS_MAP[class_min]})
        return ev
Esempio n. 4
0
    def __init__(self, lincs_data):
        self._data = lincs_data
        self._lc = LincsClient()

        # Process all the lines (skipping the header)
        self.statements = []
        for line in self._data:
            self._process_line(line)
        return
Esempio n. 5
0
    def __init__(self, data, affinity_class_limit):
        self._data = data
        self._lc = LincsClient()
        self.affinity_class_limit = affinity_class_limit

        self.statements = []
        for row in data:
            # Skip rows that are above the affinity class limit
            if int(row['class_min']) > affinity_class_limit:
                continue
            self._process_row(row)
        return
Esempio n. 6
0
    def add_hms_lincs_nodes(self):
        from indra.databases.lincs_client import LincsClient
        lc = LincsClient()

        nodes = []
        for hmsl_id, data in lc._sm_data.items():
            if '-' in hmsl_id:
                hmsl_base_id, suffix = hmsl_id.split('-')
            else:
                hmsl_base_id, suffix = hmsl_id, None
            if suffix == '999':
                continue
            nodes.append((self.label('HMS-LINCS', hmsl_base_id),
                          {'name': data['Name']}))
        self.add_nodes_from(nodes)
Esempio n. 7
0
class LincsProcessor(object):
    """Processor for the HMS LINCS drug target dataset.

    Parameters
    ----------
    lincs_data : list[dict]
        A list of dicts with keys set by the header of the csv, and values from
        the data in the csv.

    Attributes
    ----------
    statements : list[indra.statements.Statement]
        A list of indra statements extracted from the CSV file.
    """

    def __init__(self, lincs_data):
        self._data = lincs_data
        self._lc = LincsClient()

        # Process all the lines (skipping the header)
        self.statements = []
        for line in self._data:
            self._process_line(line)
        return

    def _process_line(self, line):
        drug = self._extract_drug(line)
        prot = self._extract_protein(line)
        if prot is None:
            return
        evidence = self._make_evidence(line)
        self.statements.append(Inhibition(drug, prot, evidence=evidence))

    def _extract_drug(self, line):
        drug_name = line['Small Molecule Name']
        lincs_id = line['Small Molecule HMS LINCS ID']
        refs = self._lc.get_small_molecule_refs(lincs_id)
        if 'PUBCHEM' in refs:
            chebi_id = chebi_client.get_chebi_id_from_pubchem(refs['PUBCHEM'])
            if chebi_id:
                refs['CHEBI'] = chebi_id

        return Agent(drug_name, db_refs=refs)

    def _extract_protein(self, line):
        # Extract key information from the lines.
        prot_name = line['Protein Name']
        prot_id = line['Protein HMS LINCS ID']

        # Get available db-refs.
        db_refs = {}
        if prot_id:
            db_refs.update(self._lc.get_protein_refs(prot_id))
            # Since the resource only gives us an UP ID (not HGNC), we
            # try to get that and standardize the name to the gene name
            up_id = db_refs.get('UP')
            if up_id:
                hgnc_id = uniprot_client.get_hgnc_id(up_id)
                if hgnc_id:
                    db_refs['HGNC'] = hgnc_id
                    prot_name = hgnc_client.get_hgnc_name(hgnc_id)
                else:
                    gene_name = uniprot_client.get_gene_name(up_id)
                    if gene_name:
                        prot_name = gene_name
        # In some cases lines are missing protein information in which
        # case we return None
        else:
            return None

        # Create the agent.
        return Agent(prot_name, db_refs=db_refs)

    def _make_evidence(self, line):
        ev_list = []
        key_refs = line['Key References'].split(';')
        generic_notes = {
            'is_nominal': line['Is Nominal'],
            'effective_concentration': line['Effective Concentration']
            }
        patt = re.compile('(?:pmid|pubmed\s+id):\s+(\d+)', re.IGNORECASE)
        for ref in key_refs:
            # Only extracting pmids, but there is generally more info available.
            m = patt.search(ref)
            if m is None:
                pmid = None
            else:
                pmid = m.groups()[0]
            annotations = {'reference': ref}
            annotations.update(generic_notes)
            ev = Evidence('lincs_drug', pmid=pmid, annotations=annotations,
                          epistemics={'direct': True})
            ev_list.append(ev)
        return ev_list
Esempio n. 8
0
from __future__ import absolute_import, print_function, unicode_literals

import unittest
from nose.plugins.attrib import attr
from indra.databases.lincs_client import get_drug_target_data, LincsClient

lc = LincsClient()


@attr('webservice')
@unittest.skip('LINCS web service very unreliable.')
def test_get_drug_target_data():
    data_list = get_drug_target_data()
    assert len(data_list) > 100, len(data_list)


def test_get_protein_refs():
    prot_refs = lc.get_protein_refs('200020')
    assert prot_refs.get('UP') == 'P00519'
    assert prot_refs.get('EGID') == '25'
    assert prot_refs.get('HMS-LINCS') == '200020'


def test_get_sm_name():
    sm_name = lc.get_small_molecule_name('10001')
    assert sm_name == 'Seliciclib', sm_name


def test_get_sm_refs():
    sm_refs = lc.get_small_molecule_refs('10001')
    assert sm_refs.get('HMS-LINCS') == '10001', sm_refs
Esempio n. 9
0
class LincsProcessor(object):
    """Processor for the HMS LINCS drug target dataset.

    Parameters
    ----------
    lincs_data : list[dict]
        A list of dicts with keys set by the header of the csv, and values from
        the data in the csv.

    Attributes
    ----------
    statements : list[indra.statements.Statement]
        A list of indra statements extracted from the CSV file.
    """

    def __init__(self, lincs_data):
        self._data = lincs_data
        self._lc = LincsClient()

        # Process all the lines (skipping the header)
        self.statements = []
        for line in self._data:
            self._process_line(line)
        return

    def _process_line(self, line):
        drug = self._extract_drug(line)
        prot = self._extract_protein(line)
        if prot is None:
            return
        evidence = self._make_evidence(line)
        self.statements.append(Inhibition(drug, prot, evidence=evidence))

    def _extract_drug(self, line):
        drug_name = line['Small Molecule Name']
        lincs_id = line['Small Molecule HMS LINCS ID']
        refs = self._lc.get_small_molecule_refs(lincs_id)
        if 'PUBCHEM' in refs:
            chebi_id = chebi_client.get_chebi_id_from_pubchem(refs['PUBCHEM'])
            if chebi_id:
                refs['CHEBI'] = 'CHEBI:%s' % chebi_id

        return Agent(drug_name, db_refs=refs)

    def _extract_protein(self, line):
        # Extract key information from the lines.
        prot_name = line['Protein Name']
        prot_id = line['Protein HMS LINCS ID']

        # Get available db-refs.
        db_refs = {}
        if prot_id:
            db_refs.update(self._lc.get_protein_refs(prot_id))
            # Since the resource only gives us an UP ID (not HGNC), we
            # try to get that and standardize the name to the gene name
            up_id = db_refs.get('UP')
            if up_id:
                gene_name = uniprot_client.get_gene_name(up_id)
                if gene_name:
                    prot_name = gene_name
                    hgnc_id = hgnc_client.get_hgnc_id(gene_name)
                    if hgnc_id:
                        db_refs['HGNC'] = hgnc_id
        # In some cases lines are missing protein information in which
        # case we return None
        else:
            return None

        # Create the agent.
        return Agent(prot_name, db_refs=db_refs)

    def _make_evidence(self, line):
        ev_list = []
        key_refs = line['Key References'].split(';')
        generic_notes = {
            'is_nominal': line['Is Nominal'],
            'effective_concentration': line['Effective Concentration']
            }
        patt = re.compile('(?:pmid|pubmed\s+id):\s+(\d+)', re.IGNORECASE)
        for ref in key_refs:
            # Only extracting pmids, but there is generally more info available.
            m = patt.search(ref)
            if m is None:
                pmid = None
            else:
                pmid = m.groups()[0]
            annotations = {'reference': ref}
            annotations.update(generic_notes)
            ev = Evidence('lincs_drug', pmid=pmid, annotations=annotations,
                          epistemics={'direct': True})
            ev_list.append(ev)
        return ev_list