def test_splitDeletionHGSVc( self ): """ Deletions c.76_78del (alternatively c.76_78delACT) uncharacterised breakpoints c.(87+1_88-1)_(923+1_924-1)del denotes a deletion starting at an unknown position in intron 2 between coding DNA nucleotides 87+1 and 88-1, and ending at an unknown position between coding DNA nucleotides 923+1 and 924-1 c.(?_-30)_(*220_?)del denotes the deletion of the entire gene c.88+101_oGJB2:c.355-1045del denotes a deletion which ends in the flanking GJB2 gene at position 355-1045 (in the intron between nucleotides 354 and 355) on the reverse strand (the genes are thus located and fused in opposite transcriptional directions, see Discussion) for all descriptions the most 3' position possible is arbitrarily assigned to have been changed (see FAQ); ACTTTGTGCC to ACTTGCC is described as c.5_7del (c.5_7delTGT, not as c.4_6delTTG) ctttagGCATG to cttagGCATG in an intron is described as c.301-3delT (not as c.301-5delT) TCACTGTCTGCGGTAATC to TCACTG CGGTAATC is described as c.7_10del (c.7_10delTCTG) and not as c.4_7del (c.4_7delCTGT). AAAGAAGAGGAG to AAAG GAG is described as c.5_9del (c.5_9delAAGAG) and not as c.3_7del (c.3_7delAGAAG) Exceptions using a coding DNA reference sequence there is an exception to the rule around exon/intron and exon/exon borders when identical nucleotides flank the exon/intron or exon/exon border; when the exon 3/intron 3 border is ..CAGgtg.. and RNA analysis shows no effect on splicing but a deletion of a G the change ..CAGgtg.. to ..CAgtg.. is described as c.3delG and not c.3+1delG. when exon 3 ends with ..CAA.. and exon 4 starts with ..ACG.. and the sequence of genomic DNA shows that the last A-nucleotide of exon 3 is deleted (and not the first A-nucleotide in exon 4), the deletion changing ..CAAACG.. to ..CAACG.. is described as c.3delA and not c.4delA c.1210-12T(5_9) (not c.1210-6T(5_9)) describes the variable stretch of 5 to 9 T-residues in intron 9 of the CFTR gene. The most commonly used CFTR coding DNA reference sequence contains a stretch of 7 T's (see Repeated sequences). """ hgvsc = "62delC" #del coding single vals = mafvariant().splitDeletionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "C" ) self.assertTrue( vals[1] == "62" ) self.assertTrue( vals[2] == "-" ) hgvsc = "577_580delAAAC" #del coding multiple vals = mafvariant().splitDeletionHGVSc( hgvsc , multiple = True , null = "-" ) self.assertTrue( vals[0] == "AAAC" ) self.assertTrue( vals[1] == "577" ) self.assertTrue( vals[2] == "-" ) hgvsc = "3672+5delT" #del non-coding vals = mafvariant().splitDeletionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "T" ) self.assertTrue( vals[1] == "3672+5" ) self.assertTrue( vals[2] == "-" ) hgvsc = "-115delG" #del upstream vals = mafvariant().splitDeletionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "G" ) self.assertTrue( vals[1] == "-115" ) self.assertTrue( vals[2] == "-" ) hgvsc = "*226_*229delCTTA" #del 3' UTR vals = mafvariant().splitDeletionHGVSc( hgvsc , multiple = True , null = "-" ) self.assertTrue( vals[0] == "CTTA" ) self.assertTrue( vals[1] == "*226" ) self.assertTrue( vals[2] == "-" )
def test_convertAA( self ): aa = "Glu" self.assertTrue( mafvariant().convertAA( aa ) == "E" ) aa = "Ile" self.assertTrue( mafvariant().convertAA( aa ) == "I" ) aa = "Ala" self.assertTrue( mafvariant().convertAA( aa ) == "A" ) aa = "Ser" self.assertTrue( mafvariant().convertAA( aa ) == "S" ) aa = "Gly" self.assertTrue( mafvariant().convertAA( aa ) == "G" )
def parseClinVarTitle( self , DocumentSummary ): # print "biomine::webapi::entrez::entrezapi::parseClinVarTitle - " , title = self.getEntry( DocumentSummary , 'title' ) # print title codonPos = "" peptideRef = "" peptidePos = "" peptideAlt = "" var = mafvariant() residueMatches = re.search( "\((p\.\w+)\)" , title ) # print "peptide variant: " , # print residueMatches if residueMatches: hgvsp = residueMatches.groups()[-1] # print hgvsp [ peptideRef , peptidePos , peptideAlt ] = var.splitHGVSp( hgvsp ) codonMatches = re.search( "(c\.\d+)" , title ) # print "codon variant: " , # print codonMatches if codonMatches: hgvsc = codonMatches.groups()[-1] # print hgvsc [ codonRef , codonPos , codonAlt ] = var.splitHGVSc( hgvsc ) return { "title" : title , \ "referencePeptide" : peptideRef , \ "positionPeptide" : peptidePos , \ "alternatePeptide" : peptideAlt , \ "positionCodon" : codonPos }
def parseClinVarTitle(self, DocumentSummary): # print "biomine::webapi::entrez::entrezapi::parseClinVarTitle - " , title = self.getEntry(DocumentSummary, 'title') # print title codonPos = "" peptideRef = "" peptidePos = "" peptideAlt = "" var = mafvariant() residueMatches = re.search("\((p\.\w+)\)", title) # print "peptide variant: " , # print residueMatches if residueMatches: hgvsp = residueMatches.groups()[-1] # print hgvsp [peptideRef, peptidePos, peptideAlt] = var.splitHGVSp(hgvsp) codonMatches = re.search("(c\.\d+)", title) # print "codon variant: " , # print codonMatches if codonMatches: hgvsc = codonMatches.groups()[-1] # print hgvsc [codonRef, codonPos, codonAlt] = var.splitHGVSc(hgvsc) return { "title" : title , \ "referencePeptide" : peptideRef , \ "positionPeptide" : peptidePos , \ "alternatePeptide" : peptideAlt , \ "positionCodon" : codonPos }
def test_mafLine2Variant( self ): mafLine = "\t".join( [ \ "BRAF" , "." , "." , "GRCh37" , "7" , "140453136" , "140453136" , \ "+" , "Missense_Mutation" , "SNP" , "A" , "A" , "T" , \ "rs113488022" , "." , "test-t" , "test-n" , "A" , "A" , "." , \ "." , "." , "." , "." , "." , "." , "." , "." , "." , "." , "." , \ "." , "c.1799T>A" , "p.Val600Glu" , "p.V600E" , \ "ENST00000288602" , "ENSP00000288602" \ ] ) #columns in each row: 0-6 7-12 13-19 20-30 31-34 35-36 v = mafvariant( ) v.mafLine2Variant( mafLine , codon = 32 , peptideChange = 33 ) self.assertTrue( v.gene == "BRAF" ) self.assertTrue( v.chromosome == "7" ) self.assertTrue( v.start == "140453136" ) self.assertTrue( v.stop == "140453136" ) self.assertTrue( v.reference == "A" ) self.assertTrue( v.alternate == "T" ) self.assertTrue( v.strand == "+" ) self.assertTrue( v.sample == "test-t" ) self.assertTrue( v.assembly == "GRCh37" ) self.assertTrue( v.dbsnp == "rs113488022" ) self.assertTrue( v.referencePeptide == "V" ) self.assertTrue( v.positionPeptide == "600" ) self.assertTrue( v.alternatePeptide == "E" ) self.assertTrue( v.transcriptPeptide == None ) self.assertTrue( v.positionCodon == "1799" ) self.assertTrue( v.transcriptCodon == None ) self.assertTrue( v.variantClass == "Missense_Mutation" ) self.assertTrue( v.variantType == "SNP" ) self.assertTrue( v.disease == None )
def test_splitComplexHGVSc( self ): #c.112_117delinsTG (alternatively c.112_117delAGGTCAinsTG) denotes the replacement of nucleotides 112 to 117 (AGGTCA) by TG hgvsc = "5077_5080delGCTGinsTTGATTCTGC" #complex coding del ins vals = mafvariant().splitComplexHGVSc( hgvsc , multiple = True ) self.assertTrue( vals[0] == "GCTG" ) self.assertTrue( vals[1] == "5077" ) self.assertTrue( vals[2] == "TTGATTCTGC" ) #c.113delinsTACTAGC (alternatively c.113delGinsTACTAGC) denotes the replacement of nucleotide 113 by 7 new nucleotides (TACTACG) #c.114_115delinsA (alternative c.[114G>A; 115delT]) hgvsc = "3672+5_3672+11delTGCTTTTinsG" #complex non-coding del ins vals = mafvariant().splitComplexHGVSc( hgvsc , multiple = True ) self.assertTrue( vals[0] == "TGCTTTT" ) self.assertTrue( vals[1] == "3672+5" ) self.assertTrue( vals[2] == "G" ) hgvsc = "48+6_48+7delinsTT" #complex non-coding delins vals = mafvariant().splitComplexHGVSc( hgvsc , multiple = True , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "48+6" ) self.assertTrue( vals[2] == "TT" )
def test_splitHGVSp( self ): hgvsp = "p.Val600Glu" vals = mafvariant().splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "V" ) self.assertTrue( vals[1] == "600" ) self.assertTrue( vals[2] == "E" ) hgvsp = "p.V600E" vals = mafvariant().splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "V" ) self.assertTrue( vals[1] == "600" ) self.assertTrue( vals[2] == "E" ) hgvsp = "NP_004949.1:p.Glu2419Lys" var = mafvariant() vals = var.splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "E" ) self.assertTrue( vals[1] == "2419" ) self.assertTrue( vals[2] == "K" ) self.assertTrue( var.referencePeptide == "E" ) self.assertTrue( var.positionPeptide == "2419" ) self.assertTrue( var.alternatePeptide == "K" ) self.assertTrue( var.transcriptPeptide == "NP_004949.1" ) #EPHB2:1:23111545-23111545G>A::NM_017449.4:c.787G>A::NP_059145.2:p.V263Ile hgvsp = "NP_059145.2:p.V263Ile" var = mafvariant() vals = var.splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "V" ) self.assertTrue( vals[1] == "263" ) self.assertTrue( vals[2] == "I" ) self.assertTrue( var.referencePeptide == "V" ) self.assertTrue( var.positionPeptide == "263" ) self.assertTrue( var.alternatePeptide == "I" ) self.assertTrue( var.transcriptPeptide == "NP_059145.2" ) #self.assertTrue( vals[0] #COL4A5:X:107939525-107939525A>G::NM_033380.2:c.A>G::NP_203699.1:p. -- p.? hgvsp = "NP_203699.1:p.?" var = mafvariant() vals = var.splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "" ) self.assertTrue( vals[1] == "" ) self.assertTrue( vals[2] == "" ) self.assertTrue( var.referencePeptide == "" ) self.assertTrue( var.positionPeptide is None ) self.assertTrue( var.alternatePeptide == "" ) self.assertTrue( var.transcriptPeptide == "NP_203699.1" ) #EPHB2:1:23189553-23189553G>T::NM_017449.4:c.835G>T::NP_059145.2:p.Ala279Ser hgvsp = "NP_059145.2:p.Ala279Ser" var = mafvariant() vals = var.splitHGVSp( hgvsp ) self.assertTrue( vals[0] == "A" ) self.assertTrue( vals[1] == "279" ) self.assertTrue( vals[2] == "S" ) self.assertTrue( var.referencePeptide == "A" ) self.assertTrue( var.positionPeptide == "279" ) self.assertTrue( var.alternatePeptide == "S" ) self.assertTrue( var.transcriptPeptide == "NP_059145.2" )
def test_splitHGVSc( self ): hgvsc = "NM_004958.3:c.7255G>A" #snv coding var = mafvariant() vals = var.splitHGVSc( hgvsc ) self.assertTrue( vals[0] == "G" ) self.assertTrue( vals[1] == "7255" ) self.assertTrue( vals[2] == "A" ) self.assertTrue( var.reference == "-" ) self.assertTrue( var.positionCodon == "7255" ) self.assertTrue( var.alternate == "-" ) self.assertTrue( var.transcriptCodon == "NM_004958.3" ) hgvsc = "NM_004958.3:c.7255G>A" #snv coding var = mafvariant() vals = var.splitHGVSc( hgvsc , override = True ) self.assertTrue( vals[0] == "G" ) self.assertTrue( vals[1] == "7255" ) self.assertTrue( vals[2] == "A" ) self.assertTrue( var.reference == "G" ) self.assertTrue( var.positionCodon == "7255" ) self.assertTrue( var.alternate == "A" ) self.assertTrue( var.transcriptCodon == "NM_004958.3" )
def test_splitSNVHGVSc( self ): """ Substitutions c.76A>C c.-14G>C denotes a G to C substitution 14 nucleotides 5' of the ATG translation initiation codon c.88+1G>T denotes the G to T substitution at nucleotide +1 of an intron (in the coding DNA positioned between nucleotides 88 and 89) c.89-2A>C denotes the A to C substitution at nucleotide -2 of an intron (in the coding DNA positioned between nucleotides 88 and 89) c.*46T>A denotes a T to A substitution 46 nucleotides 3' of the translation termination codon """ hgvsc = "1799T>A" #snv coding vals = mafvariant().splitSNVHGVSc( hgvsc ) self.assertTrue( vals[0] == "T" ) self.assertTrue( vals[1] == "1799" ) self.assertTrue( vals[2] == "A" ) hgvsc = "1290-2A>C" #snv non-coding vals = mafvariant().splitSNVHGVSc( hgvsc , noncoding = True ) self.assertTrue( vals[0] == "A" ) self.assertTrue( vals[1] == "1290-2" ) self.assertTrue( vals[2] == "C" ) hgvsc = "2301+15A>C" #snv non-coding vals = mafvariant().splitSNVHGVSc( hgvsc , noncoding = True ) self.assertTrue( vals[0] == "A" ) self.assertTrue( vals[1] == "2301+15" ) self.assertTrue( vals[2] == "C" ) hgvsc = "-14G>C" #snv non-coding vals = mafvariant().splitSNVHGVSc( hgvsc , noncoding = True ) self.assertTrue( vals[0] == "G" ) self.assertTrue( vals[1] == "-14" ) self.assertTrue( vals[2] == "C" ) hgvsc = "*46T>A" #snv non-coding vals = mafvariant().splitSNVHGVSc( hgvsc , noncoding = True ) self.assertTrue( vals[0] == "T" ) self.assertTrue( vals[1] == "*46" ) self.assertTrue( vals[2] == "A" )
def readMAF( inputFile , **kwargs ): userVariants = [] try: inFile = open( inputFile , 'r' ) codonColumn = kwargs.get( 'codon' , 47 ) peptideChangeColumn = kwargs.get( 'peptideChange' , 48 ) next(inFile) for line in inFile: var = mafvariant() var.mafLine2Variant( line , peptideChange=peptideChangeColumn , codon=codonColumn ) userVariants.append( var ) return userVariants except: raise Exception( "biomine Error: bad .maf file" )
def test_splitInsertionHGVSc( self ): """ Insertions c.76_77insT denotes that a T is inserted between nucleotides 76 and 77 of the coding DNA reference sequence c.123+54_123+55insAB012345.2:g.76_420 denotes an intronic insertion ( between nucleotides c.123+54 and 123+55) of 345 nucleotides (nucleotides 76 to 420 like in GenBank file AB012345 version 2) NOTE: descriptions like c.123+54_123+55ins345 and c.123+54_123+55insAlu are not allowed: "ins345" and "insAlu" are not specified and the description can not be used to reconstruct the exact change described. """ hgvsc = "62insC" #ins coding single vals = mafvariant().splitInsertionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "62" ) self.assertTrue( vals[2] == "C" ) hgvsc = "577_580insAAAC" #ins coding multiple vals = mafvariant().splitInsertionHGVSc( hgvsc , multiple = True , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "577" ) self.assertTrue( vals[2] == "AAAC" ) hgvsc = "3672+5insT" #ins non-coding vals = mafvariant().splitInsertionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "3672+5" ) self.assertTrue( vals[2] == "T" ) hgvsc = "-115insG" #ins upstream vals = mafvariant().splitInsertionHGVSc( hgvsc , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "-115" ) self.assertTrue( vals[2] == "G" ) hgvsc = "*226_*229insCTTA" #ins 3' UTR vals = mafvariant().splitInsertionHGVSc( hgvsc , multiple = True , null = "-" ) self.assertTrue( vals[0] == "-" ) self.assertTrue( vals[1] == "*226" ) self.assertTrue( vals[2] == "CTTA" )
def readMAF(inputFile, **kwargs): userVariants = [] try: inFile = open(inputFile, 'r') codonColumn = kwargs.get('codon', 47) peptideChangeColumn = kwargs.get('peptideChange', 48) next(inFile) for line in inFile: var = mafvariant() var.mafLine2Variant(line, peptideChange=peptideChangeColumn, codon=codonColumn) userVariants.append(var) return userVariants except: raise Exception("biomine Error: bad .maf file")
def test_empty_init( self ): v = mafvariant() self.assertTrue( v.gene == "" ) self.assertTrue( v.chromosome == None ) self.assertTrue( v.start == None ) self.assertTrue( v.stop == None ) self.assertTrue( v.reference == "-" ) self.assertTrue( v.alternate == "-" ) self.assertTrue( v.strand == "+" ) self.assertTrue( v.sample == None ) self.assertTrue( v.assembly == None ) self.assertTrue( v.dbsnp == None ) self.assertTrue( v.referencePeptide == "" ) self.assertTrue( v.positionPeptide == None ) self.assertTrue( v.alternatePeptide == "" ) self.assertTrue( v.transcriptPeptide == None ) self.assertTrue( v.positionCodon == None ) self.assertTrue( v.transcriptCodon == None ) self.assertTrue( v.variantClass == None ) self.assertTrue( v.variantType == None ) self.assertTrue( v.disease == None )
def getCSQ(self, var, record, begin, end, ref, alt, alti, **kwargs): info = record.INFO csq = info.get('CSQ', "noCSQ") preVEP = [] if not csq == "noCSQ": vepDone = True exacDone = True var.vepVariant = vepvariant() for thisCSQ in csq: values = thisCSQ.split("|") var.vcfInfo = values aas = [None, None] if self.getVCFKeyIndex(values, "Amino_acids"): #8 => Amino_acids aas = self.getVCFKeyIndex(values, "Amino_acids").split("/") if len(aas) > 1: aas[0] = mafvariant().convertAA(aas[0]) aas[1] = mafvariant().convertAA(aas[1]) else: #28 => HGVSc #29 => HGVSp hgvsp = self.getVCFKeyIndex(values, "HGVSp").split(":") changep = None if len(hgvsp) > 1: changep = re.match("p\.", hgvsp[1]) if changep: aas = mafvariant().splitHGVSp(hgvsp[1]) aas[0] = mafvariant().convertAA(aas[0]) aas[2] = mafvariant().convertAA(aas[2]) else: aas.append(None) needVEP = True preVEP.append(var) exons = [None, None] if self.getVCFKeyIndex(values, "EXON"): #25 => EXON exons = self.getVCFKeyIndex(values, "EXON").split("/") if len(exons) == 1: exons.append(None) introns = [None, None] if self.getVCFKeyIndex(values, "INTRON"): #26 => INTRON introns = self.getVCFKeyIndex(values, "INTRON").split("/") if len(introns) == 1: introns.append(None) siftStuff = [None, None] if self.getVCFKeyIndex(values, "SIFT"): siftStuff = self.getVCFKeyIndex(values, "SIFT").split("(") if len(siftStuff) == 1: siftStuff.append(None) else: siftStuff[1] = siftStuff[1].rstrip(")") polyPhenStuff = [None, None] if self.getVCFKeyIndex(values, "PolyPhen"): polyPhenStuff = self.getVCFKeyIndex(values, "PolyPhen").split("(") if len(polyPhenStuff) == 1: polyPhenStuff.append(None) else: polyPhenStuff[1] = polyPhenStuff[1].rstrip(")") vcv = vepconsequencevariant( \ chromosome = record.CHROM , \ start = begin , \ stop = end , \ dbsnp = record.ID , \ reference = ref , \ alternate = alt , \ gene_id=self.getVCFKeyIndex( values , "Gene" ) , \ transcriptCodon=self.getVCFKeyIndex( values , "Feature" ) , \ consequence_terms=self.getVCFKeyIndex( values , "Consequence" ).split( "&" ) , \ positionCodon=self.getVCFKeyIndex( values , "cDNA_position" ) , \ positionPeptide=self.getVCFKeyIndex( values , "Protein_position" ) , \ referencePeptide=aas[0] , \ alternatePeptide=aas[1] , \ strand=self.getVCFKeyIndex( values , "STRAND" ) , \ gene=self.getVCFKeyIndex( values , "SYMBOL" ) , \ gene_symbol_source=self.getVCFKeyIndex( values , "SYMBOL_SOURCE" ) , \ hgnc_id=self.getVCFKeyIndex( values , "HGNC_ID" ) , \ biotype=self.getVCFKeyIndex( values , "BIOTYPE" ) , \ canonical=self.getVCFKeyIndex( values , "CANONICAL" ) , \ ccds=self.getVCFKeyIndex( values , "CCDS" ) , \ transcriptPeptide=self.getVCFKeyIndex( values , "ENSP" ) , \ predictionSIFT=siftStuff[0] , \ scoreSIFT=siftStuff[1] , \ predictionPolyphen=polyPhenStuff[0] , \ scorePolyphen=polyPhenStuff[1] , \ exon=exons[0] , \ totalExons=exons[1] , \ intron=introns[0] , \ totalIntrons=introns[1] , \ ) var.alleleFrequency = self.getVCFKeyIndex(values, "GMAF") var.vepVariant.consequences.append(vcv) self.determineMostSevere(var, **kwargs) self.setAlleleMeasures(var, info, **kwargs) return None
def test_empty_init(self): v = mafvariant() self.assertFalse(v)
def test_splitHGVSp(self): hgvsp = "p.Val600Glu" vals = mafvariant().splitHGVSp(hgvsp) self.assertTrue(vals[0] == "V") self.assertTrue(vals[1] == "600") self.assertTrue(vals[2] == "E") hgvsp = "p.V600E" vals = mafvariant().splitHGVSp(hgvsp) self.assertTrue(vals[0] == "V") self.assertTrue(vals[1] == "600") self.assertTrue(vals[2] == "E") hgvsp = "NP_004949.1:p.Glu2419Lys" var = mafvariant() vals = var.splitHGVSp(hgvsp) self.assertTrue(vals[0] == "E") self.assertTrue(vals[1] == "2419") self.assertTrue(vals[2] == "K") self.assertTrue(var.referencePeptide == "E") self.assertTrue(var.positionPeptide == "2419") self.assertTrue(var.alternatePeptide == "K") self.assertTrue(var.transcriptPeptide == "NP_004949.1") #EPHB2:1:23111545-23111545G>A::NM_017449.4:c.787G>A::NP_059145.2:p.V263Ile hgvsp = "NP_059145.2:p.V263Ile" var = mafvariant() vals = var.splitHGVSp(hgvsp) self.assertTrue(vals[0] == "V") self.assertTrue(vals[1] == "263") self.assertTrue(vals[2] == "I") self.assertTrue(var.referencePeptide == "V") self.assertTrue(var.positionPeptide == "263") self.assertTrue(var.alternatePeptide == "I") self.assertTrue(var.transcriptPeptide == "NP_059145.2") #self.assertTrue( vals[0] #COL4A5:X:107939525-107939525A>G::NM_033380.2:c.A>G::NP_203699.1:p. -- p.? hgvsp = "NP_203699.1:p.?" var = mafvariant() vals = var.splitHGVSp(hgvsp) self.assertTrue(vals[0] == "") self.assertTrue(vals[1] == "") self.assertTrue(vals[2] == "") self.assertTrue(var.referencePeptide == "") self.assertTrue(var.positionPeptide is None) self.assertTrue(var.alternatePeptide == "") self.assertTrue(var.transcriptPeptide == "NP_203699.1") #EPHB2:1:23189553-23189553G>T::NM_017449.4:c.835G>T::NP_059145.2:p.Ala279Ser hgvsp = "NP_059145.2:p.Ala279Ser" var = mafvariant() vals = var.splitHGVSp(hgvsp) self.assertTrue(vals[0] == "A") self.assertTrue(vals[1] == "279") self.assertTrue(vals[2] == "S") self.assertTrue(var.referencePeptide == "A") self.assertTrue(var.positionPeptide == "279") self.assertTrue(var.alternatePeptide == "S") self.assertTrue(var.transcriptPeptide == "NP_059145.2") #POLR2F:22:38369491-38369493CAGGGCCCCCTTT>C::NM_006941.3:c.delCAGGGCCCCCTTTinsC::NP_008872.1:p.*467Cysext*82 hgvsp = "NP_008872.1:p.*467Cysext*82" var = mafvariant() vals = var.splitHGVSp(hgvsp) self.assertTrue(vals[0] == "*") self.assertTrue(vals[1] == "467") #pdb.set_trace() self.assertTrue(vals[2] == "Cext*82") self.assertTrue(var.referencePeptide == "*") self.assertTrue(var.positionPeptide == "467") self.assertTrue(var.alternatePeptide == "Cext*82") self.assertTrue(var.transcriptPeptide == "NP_008872.1")