Example #1
0
def getEnsemblGene(gene):
    """Find ENSG for this gene.
       Look in HGNC aliases for this gene name.
       If a page has no ENGG, use the HGNCs
       with the hgnc module to look for one."""
    searchUrl = baseUrl + 'Search/Keyword?queryString=%s' % (gene, )
    response = requests.get(searchUrl)
    soup = bs4.BeautifulSoup(response.text)
    links = soup.find_all("td", class_="gc-gene-symbol")
    ensemblGeneSet = set()
    i = 0
    for link in links:
        for item in link:
            name = str(item).split('data-ga-label="')[1].split('"')[0]
            href = baseUrl + str(item).split('href="')[1].split('"')[0].split('&')[0]
            time.sleep(2)
            newHtml = requests.get(href)
            hgncAliasSet = getPreviousHGNC(newHtml) | getAlias(newHtml)
            ensemblSet = mkEnsemblSet(newHtml) | set([hgnc.fetchEnsemblGeneIdForGeneSymbol(name)])
            if name == gene or gene in hgncAliasSet:
                ensemblGeneSet |= ensemblSet
            if i:
                time.sleep(2)
            i += 1
    return ensemblGeneSet
Example #2
0
def testSearchSymbol():
    nose.tools.assert_equal(hgnc.fetchEnsemblGeneIdForGeneSymbol('RBM5-AS1'),
                            'ENSG00000281691')