def get_posedits_for_seqvar(seqvar): posedits = [] try: comp = VariantComponents(seqvar) except RejectedSeqVar as error: log.debug(error) return [] # 1) Official official_term = quoted_posedit(comp) if official_term: posedits.append(official_term) # 2) Slang try: for slang_term in comp.posedit_slang: slang_term = '"%s"' % slang_term if slang_term != official_term: posedits.append(slang_term) except NotImplementedError as error: # silently omit (but log) any seqvar with an edittype we don't currently support log.debug(error) return posedits
def do_queries_for_lvg(lvg, strict=False): pmids = set() unusable = 0 for variant in lvg.variants['p'].values(): try: comp = VariantComponents(variant) result = db.search(comp, lvg.gene_name, strict=strict) if result: print('@@@ RESULTS for {gene} + {ref}|{pos}'.format( gene=lvg.gene_name, ref=comp.ref, pos=comp.pos)) for item in result: pmids.add(item['PMID']) except Exception as error: unusable += 1 print('[%s] %i p-vars (%i unusable)' % (lvg.seqvar, len(lvg.hgvs_p), unusable)) for pmid in pmids: print('\t* %s' % pmid) print() return len(pmids)
def search_aminoDBs(gene, achg): print('[%s]' % achg) comp = VariantComponents(aminochange=achg) if not comp: print('[%s] INVALID Amino Change' % achg) return print('[%s] Posedit: %s' % (achg, comp.posedit)) print('[%s] Slang: %r' % (achg, comp.posedit_slang)) gene_id = GeneID(gene) print('[%s] Gene: %s (ID: %i)' % (achg, gene, gene_id)) #results = cvdb.search(comp, gene_id, strict=False) #print('[%s] Clinvar LOOSE matches: %r' % (achg, results)) results = cvdb.search(comp, gene_id, strict=True) print('[%s] Clinvar STRICT matches: %i' % (achg, len(results))) for res in results: print('[%s]' % achg, res['PMID'], res['HGVS'], res['VariationID'], res['GeneSymbol'], res['Ref'], res['Pos'], res['Alt']) results = pubdb.search_proteins(comp, gene_id) print('[%s] PubtatorDB matches: %i' % (achg, len(results))) for res in results: print(res)
def components_or_None(hgvs_p): try: comp = VariantComponents(Variant(hgvs_p)) if comp.ref != '': return comp except (TypeError, RejectedSeqVar, CriticalHgvsError): # either the hgvs_p did not parse (Variant returned None) or it has incomplete edit info. pass return None
def parse_components(components): for name, re_patt in list(component_patterns.items()): match = re_patt.search(components) if match: comp_dict = match.groupdict() # verify that this is an entry that actually helps us; remove any # entry that doesn't have a valid position (Pos). if comp_dict['Pos'].strip() == '': write_missing_position(comp_dict) return None components = VariantComponents(**comp_dict) return components.to_mysql_dict() if components.startswith('rs'): return {'RS': components, 'EditType': 'rs'} else: return None
def find_variant_in_clinvar(lex): for seqvar in lex.seqvars: try: comp = VariantComponents(seqvar) if ClinvarVariationID('%s' % seqvar): return seqvar except RejectedSeqVar: pass return None
def process_hgvs_through_pubtator(hgvs_text): print() print('[%s]' % hgvs_text) lex = LVG(hgvs_text) edittype = VariantComponents(lex.seqvar).edittype if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']: print('[%s] Cannot process edit type %s; skipping' % (hgvs_text, edittype)) return None try: gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens. gene_id = None print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id) pmids = set() for seqtype in lex.variants: for seqvar in lex.variants[seqtype]: try: components = VariantComponents(seqvar) except RejectedSeqVar: print('[%s] Rejected sequence variant: %r' % (hgvs_text, seqvar)) continue print('[%s]' % hgvs_text, seqtype, components) if seqtype == 'p': results = pubtator_db.search_proteins(components, gene_id) else: results = pubtator_db.search_m2p(components, gene_id) for res in results: pmids.add(res['PMID']) return pmids
def pubtator_lex_to_pmid(lex, gene_name=None): """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each variant found in lex.variants to do a search in PubTator for associated PMIDs. Returns a dictionary of results mapping VariantComponents objects to PMIDs found -- i.e.: { hgvs_text: {'comp': VariantComponents object, 'pmids': [<pmids>] } :param lex: lexical variant object (metavariant.VariantLVG) :return: dictionary of results """ gene_id = None if gene_name: gene_id = GeneID(gene_name) else: try: gene_name = lex.gene_name gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens -- but our results will be basically bunk without it. return [] log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id) pmids = set() for seqtype in lex.variants: for seqvar in lex.variants[seqtype].values(): try: components = VariantComponents(seqvar) except RejectedSeqVar: log.debug('[%s] Rejected sequence variant: %r' % (lex.seqvar, seqvar)) continue log.info('[%s] [[%s]] %s', lex.seqvar, seqvar, components) try: if seqtype == 'p': results = pubtator_db.search_proteins(components, gene_id) else: results = pubtator_db.search_m2p(components, gene_id) for res in results: pmids.add(res['PMID']) except PubtatorDBError as error: log.info('[%s] (%s) %r', lex.seqvar, seqvar, error) return list(pmids)
def query(hgvs_text=''): """ Runs all of the relevant search queries after producing a lex object from input hgvs_text """ # Normalize all requests to a GET with hgvs_text having no gene name. if request.method == 'POST': hgvs_text = strip_gene_name_from_hgvs_text( request.form.get('hgvs_text', '').strip()) return redirect('/query/%s' % hgvs_text, code=302) else: if strip_gene_name_from_hgvs_text(hgvs_text) != hgvs_text: return redirect('/query/%s' % strip_gene_name_from_hgvs_text(hgvs_text), code=302) hgvs_text = hgvs_text.strip() try: lex = LVG(hgvs_text) except CriticalHgvsError as error: return render_template('demo.html', error_msg='%r' % error) # GENE INFO: nice info to have at hand (e.g. medgen url) if we know the gene name for this variant. if lex.gene_name: gene_info = GeneInfo(gene_name=lex.gene_name) else: gene_info = None # CLINVAR INFO: nice info to have at hand if we can look up the variation ID for given hgvs_text. clinvar_info = ClinVarInfo(hgvs_text) # CITATION TABLE: handles all the heavy lifting of hgvs2pmid lookups and arrange citations by PMID. citation_table = CitationTable(lex) # LOVD URL: link to search in a relevant LOVD instance, if we know of one. comp = VariantComponents(lex.seqvar) lovd_url = get_lovd_url(lex.gene_name, comp) return render_template('query.html', lex=lex, lovd_url=lovd_url, citation_table=citation_table, clinvar=clinvar_info, gene=gene_info, found_in_clinvar_example_tables=None)
def clinvar_lex_to_pmid(lex): """ Takes a "lex" object (metavariant.VariantLVG) and uses each variant found in lex.variants to do a search in Clinvar for associated PMIDs. Returns a list of PMIDs. :param lex: lexical variant object (metavariant.VariantLVG) :return: list of pmids found in Clinvar """ pmids = set() for seqtype in lex.variants: for seqvar in lex.variants[seqtype].values(): # throw away sequence variants without enough information try: VariantComponents(seqvar) except RejectedSeqVar: log.debug('[%s] Rejected sequence variant: %r' % (lex.seqvar, seqvar)) continue for pmid in ClinvarPubmeds('%s' % seqvar): pmids.add(pmid) return list(pmids)
def hgvs_to_pmid_results_dict(hgvs_text): print() print('[%s]' % hgvs_text) lex = LVG(hgvs_text) edittype = VariantComponents(lex.seqvar).edittype if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']: print('[%s] Cannot process edit type %s; skipping' % (hgvs_text, edittype)) return None try: gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens. gene_id = None print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id) pmid_results = {} pmid_results['PubTator'] = PubtatorHgvs2Pmid(lex) pmid_results['ClinVar'] = ClinvarHgvs2Pmid(lex) return pmid_results
def pubtator_results_for_seqvar(seqvar_or_hgvs_text, gene_id): """ Takes a SequenceVariant or hgvs_text string. Returns a dictionary of results mapping hgvs_text to a list of results from pubtator, i.e.: { hgvs_text: [ <dictionaries representing matching results from pubtator> ] } :param seqvar_or_hgvs_text: hgvs_text or SequenceVariant object :param gene_id: id of gene associated with variant (required) :return: dictionary of results :raises: RejectedSeqVar, PubtatorDBError """ seqvar = Variant(seqvar_or_hgvs_text) hgvs_text = '%s' % seqvar result = {hgvs_text: []} components = VariantComponents(seqvar) if seqvar.type == 'p': result[hgvs_text] = pubtator_db.search_proteins(components, gene_id) else: result[hgvs_text] = pubtator_db.search_m2p(components, gene_id) return result
def test_insert(self): var_c = Variant(hgvs_c['INS']) comp = VariantComponents(var_c) pass
def test_simple_substitution(self): var_c = Variant(hgvs_c['SUB']) comp = VariantComponents(var_c) pass
def test_deletion(self): var_g = Variant(hgvs_g['DEL']) comp = VariantComponents(var_g) pass
def test_duplication(self): var_c = Variant(hgvs_c['DUP']) comp = VariantComponents(var_c) pass
def test_indel(self): var_n = Variant(hgvs_n['INDEL']) comp = VariantComponents(var_n) pass
def test_frameshift(self): var_p = Variant(hgvs_p['FS']) comp = VariantComponents(var_p) pass