def do_queries_for_lvg(lvg, strict=False):
    pmids = set()
    unusable = 0

    for variant in lvg.variants['p'].values():
        try:
            comp = VariantComponents(variant)
            result = db.search(comp, lvg.gene_name, strict=strict)
            if result:
                print('@@@ RESULTS for {gene} + {ref}|{pos}'.format(
                    gene=lvg.gene_name, ref=comp.ref, pos=comp.pos))
                for item in result:
                    pmids.add(item['PMID'])
        except Exception as error:
            unusable += 1

    print('[%s] %i p-vars (%i unusable)' %
          (lvg.seqvar, len(lvg.hgvs_p), unusable))

    for pmid in pmids:
        print('\t* %s' % pmid)

    print()

    return len(pmids)
Ejemplo n.º 2
0
def get_posedits_for_seqvar(seqvar):
    posedits = []

    try:
        comp = VariantComponents(seqvar)
    except RejectedSeqVar as error:
        log.debug(error)
        return []

    # 1) Official
    official_term = quoted_posedit(comp)
    if official_term:
        posedits.append(official_term)

    # 2) Slang
    try:
        for slang_term in comp.posedit_slang:
            slang_term = '"%s"' % slang_term
            if slang_term != official_term:
                posedits.append(slang_term)
    except NotImplementedError as error:
        # silently omit (but log) any seqvar with an edittype we don't currently support
        log.debug(error)

    return posedits
Ejemplo n.º 3
0
def search_aminoDBs(gene, achg):
    print('[%s]' % achg)
    comp = VariantComponents(aminochange=achg)
    if not comp:
        print('[%s] INVALID Amino Change' % achg)
        return

    print('[%s] Posedit: %s' % (achg, comp.posedit))
    print('[%s] Slang: %r' % (achg, comp.posedit_slang))

    gene_id = GeneID(gene)
    print('[%s] Gene: %s (ID: %i)' % (achg, gene, gene_id))

    #results = cvdb.search(comp, gene_id, strict=False)
    #print('[%s] Clinvar LOOSE matches: %r' % (achg, results))

    results = cvdb.search(comp, gene_id, strict=True)
    print('[%s] Clinvar STRICT matches: %i' % (achg, len(results)))
    
    for res in results:
        print('[%s]' % achg, res['PMID'], res['HGVS'], res['VariationID'], res['GeneSymbol'], res['Ref'], res['Pos'], res['Alt'])

    results = pubdb.search_proteins(comp, gene_id)
    print('[%s] PubtatorDB matches: %i' % (achg, len(results)))
    for res in results:
        print(res)
def components_or_None(hgvs_p):
    try:
        comp = VariantComponents(Variant(hgvs_p))
        if comp.ref != '':
            return comp
    except (TypeError, RejectedSeqVar, CriticalHgvsError):
        # either the hgvs_p did not parse (Variant returned None) or it has incomplete edit info.
        pass
    return None
Ejemplo n.º 5
0
def find_variant_in_clinvar(lex):
    for seqvar in lex.seqvars:
        try:
            comp = VariantComponents(seqvar)
            if ClinvarVariationID('%s' % seqvar):
                return seqvar
        except RejectedSeqVar:
            pass

    return None
Ejemplo n.º 6
0
def process_hgvs_through_pubtator(hgvs_text):
    print()
    print('[%s]' % hgvs_text)

    lex = LVG(hgvs_text)

    edittype = VariantComponents(lex.seqvar).edittype
    if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']:
        print('[%s] Cannot process edit type %s; skipping' %
              (hgvs_text, edittype))
        return None

    try:
        gene_id = GeneID(lex.gene_name)
    except TypeError:
        # no gene_name? it happens.
        gene_id = None

    print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id)

    pmids = set()
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype]:
            try:
                components = VariantComponents(seqvar)
            except RejectedSeqVar:
                print('[%s] Rejected sequence variant: %r' %
                      (hgvs_text, seqvar))
                continue

            print('[%s]' % hgvs_text, seqtype, components)
            if seqtype == 'p':
                results = pubtator_db.search_proteins(components, gene_id)
            else:
                results = pubtator_db.search_m2p(components, gene_id)

            for res in results:
                pmids.add(res['PMID'])

    return pmids
Ejemplo n.º 7
0
def pubtator_lex_to_pmid(lex, gene_name=None):
    """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each
    variant found in lex.variants to do a search in PubTator for associated PMIDs.

    Returns a dictionary of results mapping VariantComponents objects to PMIDs found -- i.e.:

        { hgvs_text: {'comp': VariantComponents object,
                      'pmids': [<pmids>]
                     }

    :param lex: lexical variant object (metavariant.VariantLVG)
    :return: dictionary of results
    """
    gene_id = None
    if gene_name:
        gene_id = GeneID(gene_name)
    else:
        try:
            gene_name = lex.gene_name
            gene_id = GeneID(lex.gene_name)
        except TypeError:
            # no gene_name? it happens -- but our results will be basically bunk without it.
            return []

    log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id)

    pmids = set()
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype].values():
            try:
                components = VariantComponents(seqvar)
            except RejectedSeqVar:
                log.debug('[%s] Rejected sequence variant: %r' %
                          (lex.seqvar, seqvar))
                continue

            log.info('[%s] [[%s]] %s', lex.seqvar, seqvar, components)
            try:
                if seqtype == 'p':
                    results = pubtator_db.search_proteins(components, gene_id)
                else:
                    results = pubtator_db.search_m2p(components, gene_id)
                for res in results:
                    pmids.add(res['PMID'])
            except PubtatorDBError as error:
                log.info('[%s] (%s) %r', lex.seqvar, seqvar, error)

    return list(pmids)
Ejemplo n.º 8
0
def parse_components(components):
    for name, re_patt in list(component_patterns.items()):
        match = re_patt.search(components)
        if match:
            comp_dict = match.groupdict()
            # verify that this is an entry that actually helps us; remove any
            # entry that doesn't have a valid position (Pos).
            if comp_dict['Pos'].strip() == '':
                write_missing_position(comp_dict)
                return None

            components = VariantComponents(**comp_dict)
            return components.to_mysql_dict()

    if components.startswith('rs'):
        return {'RS': components, 'EditType': 'rs'}

    else:
        return None
Ejemplo n.º 9
0
def query(hgvs_text=''):
    """ Runs all of the relevant search queries after producing a lex object from input hgvs_text """

    # Normalize all requests to a GET with hgvs_text having no gene name.
    if request.method == 'POST':
        hgvs_text = strip_gene_name_from_hgvs_text(
            request.form.get('hgvs_text', '').strip())
        return redirect('/query/%s' % hgvs_text, code=302)
    else:
        if strip_gene_name_from_hgvs_text(hgvs_text) != hgvs_text:
            return redirect('/query/%s' %
                            strip_gene_name_from_hgvs_text(hgvs_text),
                            code=302)
        hgvs_text = hgvs_text.strip()

    try:
        lex = LVG(hgvs_text)
    except CriticalHgvsError as error:
        return render_template('demo.html', error_msg='%r' % error)

    # GENE INFO: nice info to have at hand (e.g. medgen url) if we know the gene name for this variant.
    if lex.gene_name:
        gene_info = GeneInfo(gene_name=lex.gene_name)
    else:
        gene_info = None

    # CLINVAR INFO: nice info to have at hand if we can look up the variation ID for given hgvs_text.
    clinvar_info = ClinVarInfo(hgvs_text)

    # CITATION TABLE: handles all the heavy lifting of hgvs2pmid lookups and arrange citations by PMID.
    citation_table = CitationTable(lex)

    # LOVD URL: link to search in a relevant LOVD instance, if we know of one.
    comp = VariantComponents(lex.seqvar)
    lovd_url = get_lovd_url(lex.gene_name, comp)

    return render_template('query.html',
                           lex=lex,
                           lovd_url=lovd_url,
                           citation_table=citation_table,
                           clinvar=clinvar_info,
                           gene=gene_info,
                           found_in_clinvar_example_tables=None)
Ejemplo n.º 10
0
def clinvar_lex_to_pmid(lex):
    """ Takes a "lex" object (metavariant.VariantLVG) and uses each variant found in
    lex.variants to do a search in Clinvar for associated PMIDs.  
    Returns a list of PMIDs.

    :param lex: lexical variant object (metavariant.VariantLVG)
    :return: list of pmids found in Clinvar
    """
    pmids = set()
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype].values():
            # throw away sequence variants without enough information
            try:
                VariantComponents(seqvar)
            except RejectedSeqVar:
                log.debug('[%s] Rejected sequence variant: %r' %
                          (lex.seqvar, seqvar))
                continue

            for pmid in ClinvarPubmeds('%s' % seqvar):
                pmids.add(pmid)
    return list(pmids)
Ejemplo n.º 11
0
def hgvs_to_pmid_results_dict(hgvs_text):
    print()
    print('[%s]' % hgvs_text)

    lex = LVG(hgvs_text)

    edittype = VariantComponents(lex.seqvar).edittype
    if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']:
        print('[%s] Cannot process edit type %s; skipping' %
              (hgvs_text, edittype))
        return None

    try:
        gene_id = GeneID(lex.gene_name)
    except TypeError:
        # no gene_name? it happens.
        gene_id = None

    print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id)

    pmid_results = {}
    pmid_results['PubTator'] = PubtatorHgvs2Pmid(lex)
    pmid_results['ClinVar'] = ClinvarHgvs2Pmid(lex)
    return pmid_results
Ejemplo n.º 12
0
def pubtator_results_for_seqvar(seqvar_or_hgvs_text, gene_id):
    """ Takes a SequenceVariant or hgvs_text string.
    Returns a dictionary of results mapping hgvs_text to a list of results from pubtator, i.e.:

        { hgvs_text: [ <dictionaries representing matching results from pubtator> ] }

    :param seqvar_or_hgvs_text: hgvs_text or SequenceVariant object
    :param gene_id: id of gene associated with variant (required)
    :return: dictionary of results
    :raises: RejectedSeqVar, PubtatorDBError
    """
    seqvar = Variant(seqvar_or_hgvs_text)
    hgvs_text = '%s' % seqvar

    result = {hgvs_text: []}

    components = VariantComponents(seqvar)

    if seqvar.type == 'p':
        result[hgvs_text] = pubtator_db.search_proteins(components, gene_id)
    else:
        result[hgvs_text] = pubtator_db.search_m2p(components, gene_id)

    return result
Ejemplo n.º 13
0
 def test_insert(self):
     var_c = Variant(hgvs_c['INS'])
     comp = VariantComponents(var_c)
     pass
Ejemplo n.º 14
0
 def test_simple_substitution(self):
     var_c = Variant(hgvs_c['SUB'])
     comp = VariantComponents(var_c)
     pass
Ejemplo n.º 15
0
 def test_duplication(self):
     var_c = Variant(hgvs_c['DUP'])
     comp = VariantComponents(var_c)
     pass
Ejemplo n.º 16
0
 def test_indel(self):
     var_n = Variant(hgvs_n['INDEL'])
     comp = VariantComponents(var_n)
     pass
Ejemplo n.º 17
0
 def test_frameshift(self):
     var_p = Variant(hgvs_p['FS'])
     comp = VariantComponents(var_p)
     pass
Ejemplo n.º 18
0
 def test_deletion(self):
     var_g = Variant(hgvs_g['DEL'])
     comp = VariantComponents(var_g)
     pass