Exemple #1
0
 def __init__(self, filename=None):
     self._hgnc_service = HGNC()
     if filename == None:
         self.alldata = self.load_all_hgnc()
         self.df = self.build_dataframe()
     else:
         self.read_csv(filename)
 def _lookup_hgnc_id(self):
     hgnc_web = HGNC()
     hgnc = hgnc_web.fetch('hgnc_id', self.external_id)
     if hgnc['response']['numFound'] == 1:
         self.name = hgnc['response']['docs'][0]['symbol']
         self.description = hgnc['response']['docs'][0]['name']
         # Get synonyms if requested.
         if self.get_synonyms:
             for item in hgnc['response']['docs'][0]['alias_symbol']:
                 self.synonyms.append(item)
     elif hgnc['response']['numFound'] == 0:
         self.error = "No results found when querying HGNC for {}".format(
             self.external_id)
     return self
Exemple #3
0
def test_hgnc():

    h = HGNC()
    h.get_info()

    h.fetch('symbol', 'ZNF3')

    h.fetch('alias_name', 'A-kinase anchor protein, 350kDa')

    h.search('BRAF')
    h.search('symbol', 'ZNF*')
    h.search('symbol', 'ZNF?')
    h.search('symbol', 'ZNF*+AND+status:Approved')
    h.search('symbol', 'ZNF3+OR+ZNF12')
    h.search('symbol', 'ZNF*+NOT+status:Approved')
Exemple #4
0
    def __init__(self, verbosity="INFO"):
        super(Mapper, self).__init__(level=verbosity)
        self.logging.info("Initialising the services")
        self.logging.info("... uniprots")
        self._uniprot_service = UniProt()

        self.logging.info("... KEGG")
        self._kegg_service = KeggParser(verbose=False)

        self.logging.info("... HGNC")
        self._hgnc_service = HGNC()

        self.logging.info("... UniChem")
        self._unichem_service = UniChem()

        self.logging.info("...BioDBNet")
        self._biodbnet = BioDBNet()
Exemple #5
0
def add_sequence_to_nodes(n: str, d: Dict[str, Any]):
    """
    Maps UniProt ACC to UniProt ID. Retrieves sequence from UniProt and adds it to the node as a feature

    :param n: Graph node.
    :type n: str
    :param d: Graph attribute dictionary.
    :type d: Dict[str, Any]
    """
    h = HGNC(verbose=False)
    u = UniProt(verbose=False)

    d["uniprot_ids"] = h.fetch(
        "symbol", d["protein_id"])["response"]["docs"][0]["uniprot_ids"]

    # Todo these API calls should probably be batched
    # Todo mapping with bioservices to support other protein IDs?

    for id in d["uniprot_ids"]:
        d[f"sequence_{id}"] = u.get_fasta_sequence(id)
Exemple #6
0
def kegg_to_hugo(genes, species='hsa'):
    """
    Converts all KEGG names to HGNC

    Parameters
    ----------
    genes : list
    species : str

    Returns
    -------
    dict
    """
    prefix = species + ':'
    hugo = HGNC(verbose=True)
    hugo_dict = {}
    not_found = set()
    for i in genes:
        tmp_name = i.lstrip(prefix)
        mapping = hugo.search(tmp_name)
        if 'response' in mapping:
            response = mapping['response']
            if 'numFound' in response:
                if response['numFound'] == 0:
                    not_found.add(i)
                    continue
                elif response['numFound'] == 1:
                    docs = response['docs'][0]
                    hugo_dict[i] = docs['symbol']
                    continue
                else:
                    if 'symbol' in response['docs'][0]:
                        hugo_dict[i] = response['docs'][0]['symbol']
        else:
            not_found.add(i)
    if not_found != 0:
        print("{} not found after HGNC mapping".format(len(not_found)))
        print("{} ".format(not_found))
    return hugo_dict, not_found
Exemple #7
0
 def __init__(self):
     self.s = HGNC(verbose=False)