def seqvar_or_None(entry):
    if entry is None:
        return None

    # basic string cleaning
    entry = entry.strip()
    # beware the pesky N-dash in at least 1 observed ClinVar db entry.
    entry = entry.replace('\u2013', '-')

    hgvs_text = ''

    match = re_hgvs.match(entry)
    if match:
        stuff = match.groupdict()
        hgvs_text = stuff['prefix'] + '_' + stuff['transcript'] + ':' + stuff[
            'edit']

    # TODO: LRG
    else:
        return None

    try:
        return Variant(hgvs_text)
    except (CriticalHgvsError, TypeError):
        # empty or broken
        return None
Esempio n. 2
0
 def test_simple_substitution_case(self):
     var_c = Variant(hgvs_c['SUB'])
     lex = LVG(var_c)
     query = GoogleQuery(lex)
     assert query.startswith(
         '"SCN5A" ("4786T>A"|"T4786A"|"4786T-->A"|"4786T/A"|"4786T->A"')
     assert query.find('4732T->A') > -1
     assert query.find('4783T-->A') > -1
def components_or_None(hgvs_p):
    try:
        comp = VariantComponents(Variant(hgvs_p))
        if comp.ref != '':
            return comp
    except (TypeError, RejectedSeqVar, CriticalHgvsError):
        # either the hgvs_p did not parse (Variant returned None) or it has incomplete edit info.
        pass
    return None
Esempio n. 4
0
def hgvs2pmid_cli():
    args = docopt(__doc__, version=__version__)
    var = Variant(args['<hgvs>'])
    if var:
        hgvs2pmid(str(var))
    else:
        print(
            'Supplied argument must be a valid HGVS string! See --help for examples.'
        )
Esempio n. 5
0
    def test_crazy_long_hgvs_strings_just_to_see_what_happens(self):

        # https://www.ncbi.nlm.nih.gov/clinvar/38771295/    RCV000008537
        seqvar = Variant(
            'NP_001121636.1:p.Gln197_Gln208delinsGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGln'
        )  # https://www.ncbi.nlm.nih.gov/clinvar/36484151/

        # https://www.ncbi.nlm.nih.gov/clinvar/variation/68075/
        seqvar = Variant(
            'LRG_702p1:p.Glu129_Asp145delinsGluIleLysValIleSerGlyIleLeuThrGlnGlyArgCysAspIleGluIleLysValIleSerGlyIleLeuThrGlnGlyArgCysAspIleAsp'
        )

        # RCV000087437
        seqvar = Variant(
            'LRG_3p1:p.Arg1139_Gly1140insValSerSerThrGluArgTyrTyrArgSerThrCysPheArgCysLeuHisPheArgLysIlePheTrpHisCysAspValMetIleLeuSerLeu'
        )

        # also RCV000087437
        seqvar = Variant(
            'NP_000081.1:p.Arg1139_Gly1140insValSerSerThrGluArgTyrTyrArgSerThrCysPheArgCysLeuHisPheArgLysIlePheTrpHisCysAspValMetIleLeuSerLeu'
        )
Esempio n. 6
0
    def __init__(self, lex=None, seqvar=None, hgvs_text=None, **kwargs):
        """ Requires either an LVG object (lex=) or a Sequence Variant object (seqvar=) or an hgvs_text string (hgvs_text=)

        Priority for instantiation (in case of multiple-parameter submission): lex, seqvar, hgvs_text

        Keywords:

            gene_name: should be supplied when instantiated with seqvar= or hgvs_text=
        """
        if lex:
            self.lex = lex
            self.seqvar = lex.seqvar
            self.hgvs_text = lex.hgvs_text
            if lex.gene_name:
                self.gene_name = lex.gene_name
            else:
                self.gene_name = kwargs.get('gene_name', None)

        elif seqvar:
            self.lex = None
            self.seqvar = seqvar
            self.hgvs_text = '%s' % seqvar
            self.gene_name = kwargs.get('gene_name', None)

        elif hgvs_text:
            self.lex = None
            self.seqvar = Variant(hgvs_text)
            self.hgvs_text = hgvs_text
            self.gene_name = kwargs.get('gene_name', None)

        if self.gene_name is None:
            raise GoogleQueryMissingGeneName(
                'Information supplied with variant %s is missing gene name.' %
                self.seqvar)

        # self.synonyms = {'c': [], 'g': [], 'p': [], 'n': []}
        self.gene_synonyms = filter_gene_synonyms(GeneSynonyms(self.gene_name))

        # choice of Google CSE ("cx") -- "whitelist" or "schema" [default: whitelist]
        self.cse = kwargs.get('cse', 'whitelist')
Esempio n. 7
0
def pubtator_results_for_seqvar(seqvar_or_hgvs_text, gene_id):
    """ Takes a SequenceVariant or hgvs_text string.
    Returns a dictionary of results mapping hgvs_text to a list of results from pubtator, i.e.:

        { hgvs_text: [ <dictionaries representing matching results from pubtator> ] }

    :param seqvar_or_hgvs_text: hgvs_text or SequenceVariant object
    :param gene_id: id of gene associated with variant (required)
    :return: dictionary of results
    :raises: RejectedSeqVar, PubtatorDBError
    """
    seqvar = Variant(seqvar_or_hgvs_text)
    hgvs_text = '%s' % seqvar

    result = {hgvs_text: []}

    components = VariantComponents(seqvar)

    if seqvar.type == 'p':
        result[hgvs_text] = pubtator_db.search_proteins(components, gene_id)
    else:
        result[hgvs_text] = pubtator_db.search_m2p(components, gene_id)

    return result
def process_row(db, dbrow):
    """ For each column in the row that might contain an HGVS string -- namely:
        * variant_name
        * HGVS_p
        * HGVS_c

    ...do the following actions:
        * if variant_name or HGVS_c, make an LVG. If both exist, make an lvg from HGVS_c (preferentially).
            * for each seqvar, add a row to the database with '%s' % seqvar, PMID, VariantComponents --> Ref,Alt,Pos
        * if HGVS_p, see if it exists in the LVG object (in hgvs_p). 

            * if not: try to make a VariantComponents object. add new row if comp is not None.

    :param dbrow: (dict) one row from t2g_variant_summary table.
    :return: list of VariantComponents successfully added to the database (or empty list)
    """

    added_components = []

    variants = {'c': [], 'g': [], 'n': []}

    for option in ['variant_name']:  #, 'HGVS_c']:
        seqvar = seqvar_or_None(dbrow[option])
        if seqvar:
            variants[seqvar.type].append(seqvar)

    # use the first usable variant to construct an LVG for this variant.
    lex = None
    for seqtype in ['c', 'g', 'n']:
        if variants[seqtype]:
            seqvar = variants[seqtype][0]
            print('[%s] Using %s to build LVG' %
                  (dbrow['variant_name'], seqvar))
            lex = lvg_or_None(seqvar)
            if lex:
                break

    # collect a unique set of protein variants stripped of parentheses.
    hgvs_ps = set()
    #comp = components_or_None(dbrow['HGVS_p'])
    #if comp:
    # treat "uncertain" protein effects as unneeded duplicates of the "certain" ones.
    #    hgvs_ps.add(('%s' % Variant(dbrow['HGVS_p'])).replace(')','').replace('(',''))

    # add non-p-vars to the database
    if lex:
        for item in lex.hgvs_p:
            hgvs_ps.add(item.replace(')', '').replace('(', ''))

        for seqtype in ['c', 'g', 'n']:
            for seqvar in lex.variants[seqtype].values():
                comp = components_or_None(seqvar)
                if comp:
                    if add_components_to_row(db, dbrow, comp):
                        added_components.append(comp)

    # and now add the proteins
    for hgvs_p in hgvs_ps:
        comp = components_or_None(Variant(hgvs_p))
        if comp:
            if add_components_to_row(db, dbrow, comp):
                added_components.append(comp)

    return added_components
Esempio n. 9
0
 def test_frameshift_case(self):
     var_c = Variant(hgvs_c['FS'])
     lex = LVG(var_c)
     query = GoogleQuery(lex)
Esempio n. 10
0
 def test_duplication(self):
     var_c = Variant(hgvs_c['DUP'])
     comp = VariantComponents(var_c)
     pass
Esempio n. 11
0
 def test_insert(self):
     var_c = Variant(hgvs_c['INS'])
     comp = VariantComponents(var_c)
     pass
Esempio n. 12
0
 def test_frameshift(self):
     var_p = Variant(hgvs_p['FS'])
     comp = VariantComponents(var_p)
     pass
Esempio n. 13
0
 def test_indel(self):
     var_n = Variant(hgvs_n['INDEL'])
     comp = VariantComponents(var_n)
     pass
Esempio n. 14
0
 def test_simple_substitution(self):
     var_c = Variant(hgvs_c['SUB'])
     comp = VariantComponents(var_c)
     pass
Esempio n. 15
0
 def test_deletion(self):
     var_g = Variant(hgvs_g['DEL'])
     comp = VariantComponents(var_g)
     pass
Esempio n. 16
0
 def test_deletion_case(self):
     var_c = Variant(hgvs_c['DEL'])
     lex = LVG(var_c)
     query = GoogleQuery(lex)
Esempio n. 17
0
 def test_dup_case(self):
     var_c = Variant(hgvs_c['DUP'])
     lex = LVG(var_c)
     query = GoogleQuery(lex)
Esempio n. 18
0
 def test_ins_case(self):
     var_c = Variant(hgvs_c['INS'])
     lex = LVG(var_c)
     query = GoogleQuery(lex)
Esempio n. 19
0
 def test_allow_gene_name_in_hgvs_string(self):
     seqvar = Variant('NM_001165963.1(SCN1A):c.4338_6030del')
     assert 'NM_001165963.1:c.4338_6030del' == '%s' % seqvar
Esempio n. 20
0
 def test_bad_hgvs_string_returns_None(self):
     assert Variant('boogers') is None