def annotateVariantsPost(self, variants, **kwargs): # print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost" doAllOptions = kwargs.get('allOptions', True) maxPost = kwargs.get( 'maxPost', 400 ) #bc error 400 (bad request) or 504 (gateway/proxy server timeout) #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post lengthVariants = len(variants) annotatedVariants = {} #dict of vepvariants for i in range(0, lengthVariants, maxPost): j = i + maxPost if lengthVariants < maxPost: j = lengthVariants subsetVariants = variants[i:j] formattedVariants = [] nullValue = "." delim = " " needReferences = self.checkInsertionsReference(subsetVariants, nullValue=nullValue, delim=delim) self.fullReset() self.setSubset(ensemblapi.regionSubset) self.doAllOptions(data=doAllOptions) for var in subsetVariants: #inputVariant = var.vcf( delim=delim , null=nullValue ) #if var.reference == "-": # if var.genomicVar() in needReferences: #print inputVariant + " --> " , # inputVariant = delim.join( [ var.chromosome , str( int( var.start ) + 1 ) , str( int( var.stop ) - 1 ) , var.reference + "/" + var.alternate , var.strand ] ) # if vals[3] == nullValue: # inputVariant = needReferences[var.genomicVar()] #if var.alternate == "-": # vals = inputVariant.split( delim ) # if vals[4] == nullValue: # vals[4] = "-" # inputVariant = delim.join( vals ) inputVariant = var.ensembl() print inputVariant formattedVariants.append(inputVariant) vepvar = vepvariant(inputVariant=inputVariant, parentVariant=var) annotatedVariants[inputVariant] = vepvar #following examples from documentation self.addData("variants", formattedVariants) self.addHeader("Accept", "application/json") self.addHeader("Content-Type", "application/json") self.submit(post=True, **kwargs) if self.response.ok and self.response.text: root = self.response.json() for rootElement in root: var = vepvariant() var.parseEntryFromVEP(rootElement) var.setInputVariant() annotatedVariants[var.inputVariant] = var else: print "ensemblapi Error: cannot access desired XML fields/tags for variants ", print "[" + str(i) + ":" + str(j) + "]" return annotatedVariants
def annotateVariantsPost( self , variants , **kwargs ): # print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost" doAllOptions = kwargs.get( 'allOptions' , True ) maxPost = kwargs.get( 'maxPost' , 400 ) #bc error 400 (bad request) or 504 (gateway/proxy server timeout) #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post lengthVariants = len(variants) annotatedVariants = {} #dict of vepvariants for i in range(0,lengthVariants,maxPost): j = i + maxPost if lengthVariants < maxPost: j = lengthVariants subsetVariants = variants[i:j] formattedVariants = [] nullValue = "." delim = " " needReferences = self.checkInsertionsReference( subsetVariants , nullValue=nullValue , delim=delim ) self.fullReset() self.setSubset( ensemblapi.regionSubset ) self.doAllOptions( data=doAllOptions ) for var in subsetVariants: #inputVariant = var.vcf( delim=delim , null=nullValue ) #if var.reference == "-": # if var.genomicVar() in needReferences: #print inputVariant + " --> " , # inputVariant = delim.join( [ var.chromosome , str( int( var.start ) + 1 ) , str( int( var.stop ) - 1 ) , var.reference + "/" + var.alternate , var.strand ] ) # if vals[3] == nullValue: # inputVariant = needReferences[var.genomicVar()] #if var.alternate == "-": # vals = inputVariant.split( delim ) # if vals[4] == nullValue: # vals[4] = "-" # inputVariant = delim.join( vals ) inputVariant = var.ensembl() print inputVariant formattedVariants.append( inputVariant ) vepvar = vepvariant( inputVariant=inputVariant , parentVariant=var ) annotatedVariants[inputVariant] = vepvar #following examples from documentation self.addData( "variants" , formattedVariants ) self.addHeader( "Accept" , "application/json" ) self.addHeader( "Content-Type" , "application/json" ) self.submit( post=True , **kwargs ) if self.response.ok and self.response.text: root = self.response.json() for rootElement in root: var = vepvariant() var.parseEntryFromVEP( rootElement ) var.setInputVariant() annotatedVariants[var.inputVariant] = var else: print "ensemblapi Error: cannot access desired XML fields/tags for variants " , print "[" + str(i) + ":" + str(j) + "]" return annotatedVariants
def trySubmit( self , lastSubmitTime , **kwargs ): annotatedVariants = {} attempts = 0 keepGoing = True timeAtSubmit = lastSubmitTime timeSinceSubmit = time.time() waitTime = 0 frequencyLimit = 1/15 #max seconds between requests: 15 requests/second for attempts in range(0,30): if timeSinceSubmit - timeAtSubmit > waitTime: attempts += 1 timeAtSubmit = time.time() self.submit( post=True , **kwargs ) if self.response.ok and self.response.text: root = self.response.json() keepGoing = False for rootElement in root: var = vepvariant() var.parseEntryFromVEP( rootElement ) var.setInputVariant() annotatedVariants[var.inputVariant] = var return [ annotatedVariants , attempts , True , timeAtSubmit ] else: try: waitTime = self.headers['X-RateLimit-Remaining'] except: print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query due to " + str( self.status_code ) + ": " + self.reason ) return [ annotatedVariants , attempts , False , timeAtSubmit ] print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query after " + str( attempts ) + " attempts. Stopping due to " + str( self.status_code ) + ": " + self.reason ) return [ annotatedVariants , attempts , False , timeAtSubmit ]
def annotateVariantsPost(self, variants, **kwargs): # print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost" doAllOptions = kwargs.get('allOptions', True) maxPost = kwargs.get( 'maxPost', 150 ) #bc error 400 (bad request), 403 (banned), or 504 (gateway/proxy server timeout) #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post lengthVariants = len(variants) annotatedVariants = {} #dict of vepvariants timeAtSubmit = 0 for i in range(0, lengthVariants, maxPost): j = i + maxPost if lengthVariants < maxPost: j = lengthVariants subsetVariants = variants[i:j] formattedVariants = [] nullValue = "." delim = " " needReferences = self.checkInsertionsReference(subsetVariants, nullValue=nullValue, delim=delim) self.fullReset() self.setSubset(ensemblapi.regionSubset) self.doAllOptions(data=doAllOptions) for var in subsetVariants: inputVariant = var.ensembl() print inputVariant formattedVariants.append(inputVariant) vepvar = vepvariant(inputVariant=inputVariant, parentVariant=var) annotatedVariants[inputVariant] = vepvar #following examples from documentation self.addData("variants", formattedVariants) self.addHeader("Accept", "application/json") self.addHeader("Content-Type", "application/json") [annotatedVariants, attempts, success, timeAtSubmit] = self.trySubmit(timeAtSubmit, **kwargs) if not success: print "BioMine::webapi::ensembl::ensemblapi Error: cannot access desired XML fields/tags for variants ", print "[" + str(i) + ":" + str(j) + "]" return annotatedVariants
def trySubmit(self, lastSubmitTime, **kwargs): annotatedVariants = {} attempts = 0 keepGoing = True timeAtSubmit = lastSubmitTime timeSinceSubmit = time.time() waitTime = 0 frequencyLimit = 1 / 15 #max seconds between requests: 15 requests/second for attempts in range(0, 30): if timeSinceSubmit - timeAtSubmit > waitTime: attempts += 1 timeAtSubmit = time.time() self.submit(post=True, **kwargs) if self.response.ok and self.response.text: root = self.response.json() keepGoing = False for rootElement in root: var = vepvariant() var.parseEntryFromVEP(rootElement) var.setInputVariant() annotatedVariants[var.inputVariant] = var return [annotatedVariants, attempts, True, timeAtSubmit] else: try: waitTime = self.headers['X-RateLimit-Remaining'] except: print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query due to " + str(self.response.status_code) + ": " + self.response.reason) return [ annotatedVariants, attempts, False, timeAtSubmit ] print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query after " + str(attempts) + " attempts. Stopping due to " + str(self.response.status_code) + ": " + self.response.reason) return [annotatedVariants, attempts, False, timeAtSubmit]
def annotateVariantsPost( self , variants , **kwargs ): # print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost" doAllOptions = kwargs.get( 'allOptions' , True ) maxPost = kwargs.get( 'maxPost' , 150 ) #bc error 400 (bad request), 403 (banned), or 504 (gateway/proxy server timeout) #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post lengthVariants = len(variants) annotatedVariants = {} #dict of vepvariants timeAtSubmit = 0 for i in range(0,lengthVariants,maxPost): j = i + maxPost if lengthVariants < maxPost: j = lengthVariants subsetVariants = variants[i:j] formattedVariants = [] nullValue = "." delim = " " needReferences = self.checkInsertionsReference( subsetVariants , nullValue=nullValue , delim=delim ) self.fullReset() self.setSubset( ensemblapi.regionSubset ) self.doAllOptions( data=doAllOptions ) for var in subsetVariants: inputVariant = var.ensembl() print inputVariant formattedVariants.append( inputVariant ) vepvar = vepvariant( inputVariant=inputVariant , parentVariant=var ) annotatedVariants[inputVariant] = vepvar #following examples from documentation self.addData( "variants" , formattedVariants ) self.addHeader( "Accept" , "application/json" ) self.addHeader( "Content-Type" , "application/json" ) [ annotatedVariants , attempts , success , timeAtSubmit ] = self.trySubmit( timeAtSubmit , **kwargs ) if not success: print "BioMine::webapi::ensembl::ensemblapi Error: cannot access desired XML fields/tags for variants " , print "[" + str(i) + ":" + str(j) + "]" return annotatedVariants
def getCSQ(self, var, record, begin, end, ref, alt, alti, **kwargs): info = record.INFO csq = info.get('CSQ', "noCSQ") preVEP = [] if not csq == "noCSQ": vepDone = True exacDone = True var.vepVariant = vepvariant() for thisCSQ in csq: values = thisCSQ.split("|") var.vcfInfo = values aas = [None, None] if self.getVCFKeyIndex(values, "Amino_acids"): #8 => Amino_acids aas = self.getVCFKeyIndex(values, "Amino_acids").split("/") if len(aas) > 1: aas[0] = mafvariant().convertAA(aas[0]) aas[1] = mafvariant().convertAA(aas[1]) else: #28 => HGVSc #29 => HGVSp hgvsp = self.getVCFKeyIndex(values, "HGVSp").split(":") changep = None if len(hgvsp) > 1: changep = re.match("p\.", hgvsp[1]) if changep: aas = mafvariant().splitHGVSp(hgvsp[1]) aas[0] = mafvariant().convertAA(aas[0]) aas[2] = mafvariant().convertAA(aas[2]) else: aas.append(None) needVEP = True preVEP.append(var) exons = [None, None] if self.getVCFKeyIndex(values, "EXON"): #25 => EXON exons = self.getVCFKeyIndex(values, "EXON").split("/") if len(exons) == 1: exons.append(None) introns = [None, None] if self.getVCFKeyIndex(values, "INTRON"): #26 => INTRON introns = self.getVCFKeyIndex(values, "INTRON").split("/") if len(introns) == 1: introns.append(None) siftStuff = [None, None] if self.getVCFKeyIndex(values, "SIFT"): siftStuff = self.getVCFKeyIndex(values, "SIFT").split("(") if len(siftStuff) == 1: siftStuff.append(None) else: siftStuff[1] = siftStuff[1].rstrip(")") polyPhenStuff = [None, None] if self.getVCFKeyIndex(values, "PolyPhen"): polyPhenStuff = self.getVCFKeyIndex(values, "PolyPhen").split("(") if len(polyPhenStuff) == 1: polyPhenStuff.append(None) else: polyPhenStuff[1] = polyPhenStuff[1].rstrip(")") vcv = vepconsequencevariant( \ chromosome = record.CHROM , \ start = begin , \ stop = end , \ dbsnp = record.ID , \ reference = ref , \ alternate = alt , \ gene_id=self.getVCFKeyIndex( values , "Gene" ) , \ transcriptCodon=self.getVCFKeyIndex( values , "Feature" ) , \ consequence_terms=self.getVCFKeyIndex( values , "Consequence" ).split( "&" ) , \ positionCodon=self.getVCFKeyIndex( values , "cDNA_position" ) , \ positionPeptide=self.getVCFKeyIndex( values , "Protein_position" ) , \ referencePeptide=aas[0] , \ alternatePeptide=aas[1] , \ strand=self.getVCFKeyIndex( values , "STRAND" ) , \ gene=self.getVCFKeyIndex( values , "SYMBOL" ) , \ gene_symbol_source=self.getVCFKeyIndex( values , "SYMBOL_SOURCE" ) , \ hgnc_id=self.getVCFKeyIndex( values , "HGNC_ID" ) , \ biotype=self.getVCFKeyIndex( values , "BIOTYPE" ) , \ canonical=self.getVCFKeyIndex( values , "CANONICAL" ) , \ ccds=self.getVCFKeyIndex( values , "CCDS" ) , \ transcriptPeptide=self.getVCFKeyIndex( values , "ENSP" ) , \ predictionSIFT=siftStuff[0] , \ scoreSIFT=siftStuff[1] , \ predictionPolyphen=polyPhenStuff[0] , \ scorePolyphen=polyPhenStuff[1] , \ exon=exons[0] , \ totalExons=exons[1] , \ intron=introns[0] , \ totalIntrons=introns[1] , \ ) var.alleleFrequency = self.getVCFKeyIndex(values, "GMAF") var.vepVariant.consequences.append(vcv) self.determineMostSevere(var, **kwargs) self.setAlleleMeasures(var, info, **kwargs) return None
def __init__( self , **kwargs ): super(chargervariant,self).__init__(**kwargs) self.PVS1 = kwargs.get( 'PVS1' , False ) self.PS1 = kwargs.get( 'PS1' , False ) self.PS2 = kwargs.get( 'PS2' , False ) self.PS3 = kwargs.get( 'PS3' , False ) self.PS4 = kwargs.get( 'PS4' , False ) self.PM1 = kwargs.get( 'PM1' , False ) self.PM2 = kwargs.get( 'PM2' , False ) self.PM3 = kwargs.get( 'PM3' , False ) self.PM4 = kwargs.get( 'PM4' , False ) self.PM5 = kwargs.get( 'PM5' , False ) self.PM6 = kwargs.get( 'PM6' , False ) self.PP1 = kwargs.get( 'PP1' , False ) self.PP2 = kwargs.get( 'PP2' , False ) self.PP3 = kwargs.get( 'PP3' , False ) self.PP4 = kwargs.get( 'PP4' , False ) self.PP5 = kwargs.get( 'PP5' , False ) self.BA1 = kwargs.get( 'BA1' , False ) self.BS1 = kwargs.get( 'BS1' , False ) self.BS2 = kwargs.get( 'BS2' , False ) self.BS3 = kwargs.get( 'BS3' , False ) self.BS4 = kwargs.get( 'BS4' , False ) self.BP1 = kwargs.get( 'BP1' , False ) self.BP2 = kwargs.get( 'BP2' , False ) self.BP3 = kwargs.get( 'BP3' , False ) self.BP4 = kwargs.get( 'BP4' , False ) self.BP5 = kwargs.get( 'BP5' , False ) self.BP6 = kwargs.get( 'BP6' , False ) self.BP7 = kwargs.get( 'BP7' , False ) self.PSC1 = kwargs.get( 'PSC1' , False ) self.PMC1 = kwargs.get( 'PMC1' , False ) self.PPC1 = kwargs.get( 'PPC1' , False ) self.PPC2 = kwargs.get( 'PPC2' , False ) self.BSC1 = kwargs.get( 'BSC1' , False ) self.BMC1 = kwargs.get( 'BMC1' , False ) self.otherTranscripts = kwargs.get( 'otherTranscripts' , {} ) self.alleleFrequency = kwargs.get( 'alleleFrequency' , None ) self.vepAnnotations = kwargs.get( 'VEP' , None ) self.vcfHeaders = kwargs.get( 'headers' , None ) self.vcfInfo = kwargs.get( 'INFO' , [] ) self.pathogenicity = kwargs.get( 'pathogenicity' , \ { "CharGer" : chargervariant.uncertain , \ "ACMG" : chargervariant.uncertain } ) self.clinical = kwargs.get( 'clinical' , { "description" : chargervariant.uncertain , "review_status" : "" } ) self.pathogenicScore = kwargs.get( 'pathogenicScore' , 0 ) self.benignScore = kwargs.get( 'benignScore' , 0 ) self.chargerScore = kwargs.get( 'chargerScore' , 0 ) self.callSummary = kwargs.get( 'summary' , [] ) aParentVariant = kwargs.get( 'parentVariant' , None ) if aParentVariant: super( chargervariant , self ).copyInfo( aParentVariant ) # make vep and clinvar variants attributes of chargervariant self.vepVariant = kwargs.get( 'vepvariant' , vepvariant() ) self.clinvarVariant = kwargs.get( 'clinvarvariant' , clinvarvariant() ) self.transvarVariant = kwargs.get( 'transvarvariant' , None ) self.scoresMap = kwargs.get( 'scoresMap' , dict() )
def test_empty_init(self): v = vepvariant() self.assertFalse(v)