Beispiel #1
0
    def annotateVariantsPost(self, variants, **kwargs):

        #		print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost"
        doAllOptions = kwargs.get('allOptions', True)
        maxPost = kwargs.get(
            'maxPost', 400
        )  #bc error 400 (bad request) or 504 (gateway/proxy server timeout)
        #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests
        #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post
        lengthVariants = len(variants)
        annotatedVariants = {}  #dict of vepvariants
        for i in range(0, lengthVariants, maxPost):
            j = i + maxPost
            if lengthVariants < maxPost:
                j = lengthVariants
            subsetVariants = variants[i:j]
            formattedVariants = []
            nullValue = "."
            delim = " "
            needReferences = self.checkInsertionsReference(subsetVariants,
                                                           nullValue=nullValue,
                                                           delim=delim)
            self.fullReset()
            self.setSubset(ensemblapi.regionSubset)
            self.doAllOptions(data=doAllOptions)
            for var in subsetVariants:
                #inputVariant = var.vcf( delim=delim , null=nullValue )
                #if var.reference == "-":
                #	if var.genomicVar() in needReferences:
                #print inputVariant + "  -->  " ,
                #		inputVariant = delim.join( [ var.chromosome , str( int( var.start ) + 1 ) , str( int( var.stop ) - 1 ) , var.reference + "/" + var.alternate , var.strand ] )
                #	if vals[3] == nullValue:
                #		inputVariant = needReferences[var.genomicVar()]
                #if var.alternate == "-":
                #	vals = inputVariant.split( delim )
                #	if vals[4] == nullValue:
                #		vals[4] = "-"
                #	inputVariant = delim.join( vals )
                inputVariant = var.ensembl()
                print inputVariant
                formattedVariants.append(inputVariant)
                vepvar = vepvariant(inputVariant=inputVariant,
                                    parentVariant=var)
                annotatedVariants[inputVariant] = vepvar
#following examples from documentation
            self.addData("variants", formattedVariants)
            self.addHeader("Accept", "application/json")
            self.addHeader("Content-Type", "application/json")
            self.submit(post=True, **kwargs)
            if self.response.ok and self.response.text:
                root = self.response.json()
                for rootElement in root:
                    var = vepvariant()
                    var.parseEntryFromVEP(rootElement)
                    var.setInputVariant()
                    annotatedVariants[var.inputVariant] = var
            else:
                print "ensemblapi Error: cannot access desired XML fields/tags for variants ",
                print "[" + str(i) + ":" + str(j) + "]"
        return annotatedVariants
Beispiel #2
0
	def annotateVariantsPost( self , variants , **kwargs ):
		
#		print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost"
		doAllOptions = kwargs.get( 'allOptions' , True )
		maxPost = kwargs.get( 'maxPost' , 400 ) #bc error 400 (bad request) or 504 (gateway/proxy server timeout)
		#maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests
		#maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post
		lengthVariants = len(variants)
		annotatedVariants = {} #dict of vepvariants
		for i in range(0,lengthVariants,maxPost):
			j = i + maxPost
			if lengthVariants < maxPost:
				j = lengthVariants
			subsetVariants = variants[i:j]
			formattedVariants = []
			nullValue = "."
			delim = " "
			needReferences = self.checkInsertionsReference( subsetVariants , nullValue=nullValue , delim=delim )
			self.fullReset()
			self.setSubset( ensemblapi.regionSubset )
			self.doAllOptions( data=doAllOptions )
			for var in subsetVariants:
				#inputVariant = var.vcf( delim=delim , null=nullValue )
				#if var.reference == "-":
				#	if var.genomicVar() in needReferences:
						#print inputVariant + "  -->  " ,
				#		inputVariant = delim.join( [ var.chromosome , str( int( var.start ) + 1 ) , str( int( var.stop ) - 1 ) , var.reference + "/" + var.alternate , var.strand ] )
					#	if vals[3] == nullValue:
					#		inputVariant = needReferences[var.genomicVar()]
				#if var.alternate == "-":
				#	vals = inputVariant.split( delim )
				#	if vals[4] == nullValue:
				#		vals[4] = "-"
				#	inputVariant = delim.join( vals )
				inputVariant = var.ensembl()
				print inputVariant
				formattedVariants.append( inputVariant )
				vepvar = vepvariant( inputVariant=inputVariant , parentVariant=var )
				annotatedVariants[inputVariant] = vepvar
 			#following examples from documentation
			self.addData( "variants" , formattedVariants )
			self.addHeader( "Accept" , "application/json" )
			self.addHeader( "Content-Type" , "application/json" )
			self.submit( post=True , **kwargs )
			if self.response.ok and self.response.text:
				root = self.response.json()
				for rootElement in root:
					var = vepvariant()
					var.parseEntryFromVEP( rootElement )
					var.setInputVariant()
					annotatedVariants[var.inputVariant] = var
			else:
				print "ensemblapi Error: cannot access desired XML fields/tags for variants " ,
				print "[" + str(i) + ":" + str(j) + "]"
		return annotatedVariants
Beispiel #3
0
	def trySubmit( self , lastSubmitTime , **kwargs ):
		annotatedVariants = {}
		attempts = 0
		keepGoing = True
		timeAtSubmit = lastSubmitTime
		timeSinceSubmit = time.time()
		waitTime = 0
		frequencyLimit = 1/15 #max seconds between requests: 15 requests/second
		for attempts in range(0,30):
			if timeSinceSubmit - timeAtSubmit > waitTime:
				attempts += 1
				timeAtSubmit = time.time()
				self.submit( post=True , **kwargs )
				if self.response.ok and self.response.text:
					root = self.response.json()
					keepGoing = False
					for rootElement in root:
						var = vepvariant()
						var.parseEntryFromVEP( rootElement )
						var.setInputVariant()
						annotatedVariants[var.inputVariant] = var
					return [ annotatedVariants , attempts , True , timeAtSubmit ]
				else:
					try:
						waitTime = self.headers['X-RateLimit-Remaining']
					except:
						print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query due to " + str( self.status_code ) + ": " + self.reason )
						return [ annotatedVariants , attempts , False , timeAtSubmit ]
		print( "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query after " + str( attempts ) + " attempts. Stopping due to " + str( self.status_code ) + ": " + self.reason )
		return [ annotatedVariants , attempts , False , timeAtSubmit ]
Beispiel #4
0
    def annotateVariantsPost(self, variants, **kwargs):

        #		print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost"
        doAllOptions = kwargs.get('allOptions', True)
        maxPost = kwargs.get(
            'maxPost', 150
        )  #bc error 400 (bad request), 403 (banned), or 504 (gateway/proxy server timeout)
        #maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests
        #maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post
        lengthVariants = len(variants)
        annotatedVariants = {}  #dict of vepvariants
        timeAtSubmit = 0
        for i in range(0, lengthVariants, maxPost):
            j = i + maxPost
            if lengthVariants < maxPost:
                j = lengthVariants
            subsetVariants = variants[i:j]
            formattedVariants = []
            nullValue = "."
            delim = " "
            needReferences = self.checkInsertionsReference(subsetVariants,
                                                           nullValue=nullValue,
                                                           delim=delim)
            self.fullReset()
            self.setSubset(ensemblapi.regionSubset)
            self.doAllOptions(data=doAllOptions)
            for var in subsetVariants:
                inputVariant = var.ensembl()
                print inputVariant
                formattedVariants.append(inputVariant)
                vepvar = vepvariant(inputVariant=inputVariant,
                                    parentVariant=var)
                annotatedVariants[inputVariant] = vepvar
#following examples from documentation
            self.addData("variants", formattedVariants)
            self.addHeader("Accept", "application/json")
            self.addHeader("Content-Type", "application/json")
            [annotatedVariants, attempts, success,
             timeAtSubmit] = self.trySubmit(timeAtSubmit, **kwargs)
            if not success:
                print "BioMine::webapi::ensembl::ensemblapi Error: cannot access desired XML fields/tags for variants ",
                print "[" + str(i) + ":" + str(j) + "]"
        return annotatedVariants
Beispiel #5
0
 def trySubmit(self, lastSubmitTime, **kwargs):
     annotatedVariants = {}
     attempts = 0
     keepGoing = True
     timeAtSubmit = lastSubmitTime
     timeSinceSubmit = time.time()
     waitTime = 0
     frequencyLimit = 1 / 15  #max seconds between requests: 15 requests/second
     for attempts in range(0, 30):
         if timeSinceSubmit - timeAtSubmit > waitTime:
             attempts += 1
             timeAtSubmit = time.time()
             self.submit(post=True, **kwargs)
             if self.response.ok and self.response.text:
                 root = self.response.json()
                 keepGoing = False
                 for rootElement in root:
                     var = vepvariant()
                     var.parseEntryFromVEP(rootElement)
                     var.setInputVariant()
                     annotatedVariants[var.inputVariant] = var
                 return [annotatedVariants, attempts, True, timeAtSubmit]
             else:
                 try:
                     waitTime = self.headers['X-RateLimit-Remaining']
                 except:
                     print(
                         "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query due to "
                         + str(self.response.status_code) + ": " +
                         self.response.reason)
                     return [
                         annotatedVariants, attempts, False, timeAtSubmit
                     ]
     print(
         "BioMine::webapi::ensembl::ensemblapi::trySubmit Error: failed to query after "
         + str(attempts) + " attempts. Stopping due to " +
         str(self.response.status_code) + ": " + self.response.reason)
     return [annotatedVariants, attempts, False, timeAtSubmit]
Beispiel #6
0
	def annotateVariantsPost( self , variants , **kwargs ):
		
#		print "biomine::webapi::ensembl::ensemblapi::annotateVariantsPost"
		doAllOptions = kwargs.get( 'allOptions' , True )
		maxPost = kwargs.get( 'maxPost' , 150 ) #bc error 400 (bad request), 403 (banned), or 504 (gateway/proxy server timeout)
		#maxPost = 400 #https://github.com/ensembl/ensembl-rest/wiki/POST-Requests
		#maxPost = 1000 #http://rest.ensembl.org/documentation/info/vep_region_post
		lengthVariants = len(variants)
		annotatedVariants = {} #dict of vepvariants
		timeAtSubmit = 0
		for i in range(0,lengthVariants,maxPost):
			j = i + maxPost
			if lengthVariants < maxPost:
				j = lengthVariants
			subsetVariants = variants[i:j]
			formattedVariants = []
			nullValue = "."
			delim = " "
			needReferences = self.checkInsertionsReference( subsetVariants , nullValue=nullValue , delim=delim )
			self.fullReset()
			self.setSubset( ensemblapi.regionSubset )
			self.doAllOptions( data=doAllOptions )
			for var in subsetVariants:
				inputVariant = var.ensembl()
				print inputVariant
				formattedVariants.append( inputVariant )
				vepvar = vepvariant( inputVariant=inputVariant , parentVariant=var )
				annotatedVariants[inputVariant] = vepvar
 			#following examples from documentation
			self.addData( "variants" , formattedVariants )
			self.addHeader( "Accept" , "application/json" )
			self.addHeader( "Content-Type" , "application/json" )
			[ annotatedVariants , attempts , success , timeAtSubmit ] = self.trySubmit( timeAtSubmit , **kwargs )
			if not success:
				print "BioMine::webapi::ensembl::ensemblapi Error: cannot access desired XML fields/tags for variants " ,
				print "[" + str(i) + ":" + str(j) + "]"
		return annotatedVariants
Beispiel #7
0
    def getCSQ(self, var, record, begin, end, ref, alt, alti, **kwargs):
        info = record.INFO
        csq = info.get('CSQ', "noCSQ")
        preVEP = []
        if not csq == "noCSQ":
            vepDone = True
            exacDone = True
            var.vepVariant = vepvariant()
            for thisCSQ in csq:
                values = thisCSQ.split("|")
                var.vcfInfo = values
                aas = [None, None]
                if self.getVCFKeyIndex(values,
                                       "Amino_acids"):  #8 => Amino_acids
                    aas = self.getVCFKeyIndex(values, "Amino_acids").split("/")
                    if len(aas) > 1:
                        aas[0] = mafvariant().convertAA(aas[0])
                        aas[1] = mafvariant().convertAA(aas[1])
                    else:
                        #28 => HGVSc
                        #29 => HGVSp
                        hgvsp = self.getVCFKeyIndex(values, "HGVSp").split(":")
                        changep = None
                        if len(hgvsp) > 1:
                            changep = re.match("p\.", hgvsp[1])
                        if changep:
                            aas = mafvariant().splitHGVSp(hgvsp[1])
                            aas[0] = mafvariant().convertAA(aas[0])
                            aas[2] = mafvariant().convertAA(aas[2])
                        else:
                            aas.append(None)
                            needVEP = True
                            preVEP.append(var)
                exons = [None, None]
                if self.getVCFKeyIndex(values, "EXON"):  #25 => EXON
                    exons = self.getVCFKeyIndex(values, "EXON").split("/")
                    if len(exons) == 1:
                        exons.append(None)
                introns = [None, None]
                if self.getVCFKeyIndex(values, "INTRON"):  #26 => INTRON
                    introns = self.getVCFKeyIndex(values, "INTRON").split("/")
                    if len(introns) == 1:
                        introns.append(None)
                siftStuff = [None, None]
                if self.getVCFKeyIndex(values, "SIFT"):
                    siftStuff = self.getVCFKeyIndex(values, "SIFT").split("(")
                    if len(siftStuff) == 1:
                        siftStuff.append(None)
                    else:
                        siftStuff[1] = siftStuff[1].rstrip(")")
                polyPhenStuff = [None, None]
                if self.getVCFKeyIndex(values, "PolyPhen"):
                    polyPhenStuff = self.getVCFKeyIndex(values,
                                                        "PolyPhen").split("(")
                    if len(polyPhenStuff) == 1:
                        polyPhenStuff.append(None)
                    else:
                        polyPhenStuff[1] = polyPhenStuff[1].rstrip(")")

                vcv = vepconsequencevariant( \
                 chromosome = record.CHROM , \
                 start = begin , \
                 stop = end , \
                 dbsnp = record.ID , \
                 reference = ref , \
                 alternate = alt , \
                 gene_id=self.getVCFKeyIndex( values , "Gene" ) , \
                 transcriptCodon=self.getVCFKeyIndex( values , "Feature" ) , \
                 consequence_terms=self.getVCFKeyIndex( values , "Consequence" ).split( "&" ) , \
                 positionCodon=self.getVCFKeyIndex( values , "cDNA_position" ) , \
                 positionPeptide=self.getVCFKeyIndex( values , "Protein_position" ) , \
                 referencePeptide=aas[0] , \
                 alternatePeptide=aas[1] , \
                 strand=self.getVCFKeyIndex( values , "STRAND" ) , \
                 gene=self.getVCFKeyIndex( values , "SYMBOL" ) , \
                 gene_symbol_source=self.getVCFKeyIndex( values , "SYMBOL_SOURCE" ) , \
                 hgnc_id=self.getVCFKeyIndex( values , "HGNC_ID" ) , \
                 biotype=self.getVCFKeyIndex( values , "BIOTYPE" ) , \
                 canonical=self.getVCFKeyIndex( values , "CANONICAL" ) , \
                 ccds=self.getVCFKeyIndex( values , "CCDS" ) , \
                 transcriptPeptide=self.getVCFKeyIndex( values , "ENSP" ) , \
                 predictionSIFT=siftStuff[0] , \
                 scoreSIFT=siftStuff[1] , \
                 predictionPolyphen=polyPhenStuff[0] , \
                 scorePolyphen=polyPhenStuff[1] , \
                 exon=exons[0] , \
                 totalExons=exons[1] , \
                 intron=introns[0] , \
                 totalIntrons=introns[1] , \
                )

                var.alleleFrequency = self.getVCFKeyIndex(values, "GMAF")
                var.vepVariant.consequences.append(vcv)
            self.determineMostSevere(var, **kwargs)
            self.setAlleleMeasures(var, info, **kwargs)
        return None
Beispiel #8
0
	def __init__( self , **kwargs ):
		super(chargervariant,self).__init__(**kwargs)
		self.PVS1 = kwargs.get( 'PVS1' , False )
		self.PS1 = kwargs.get( 'PS1' , False )
		self.PS2 = kwargs.get( 'PS2' , False )
		self.PS3 = kwargs.get( 'PS3' , False )
		self.PS4 = kwargs.get( 'PS4' , False )
		self.PM1 = kwargs.get( 'PM1' , False )
		self.PM2 = kwargs.get( 'PM2' , False )
		self.PM3 = kwargs.get( 'PM3' , False )
		self.PM4 = kwargs.get( 'PM4' , False )
		self.PM5 = kwargs.get( 'PM5' , False )
		self.PM6 = kwargs.get( 'PM6' , False )
		self.PP1 = kwargs.get( 'PP1' , False )
		self.PP2 = kwargs.get( 'PP2' , False )
		self.PP3 = kwargs.get( 'PP3' , False )
		self.PP4 = kwargs.get( 'PP4' , False )
		self.PP5 = kwargs.get( 'PP5' , False )

		self.BA1 = kwargs.get( 'BA1' , False )
		self.BS1 = kwargs.get( 'BS1' , False )
		self.BS2 = kwargs.get( 'BS2' , False )
		self.BS3 = kwargs.get( 'BS3' , False )
		self.BS4 = kwargs.get( 'BS4' , False )
		self.BP1 = kwargs.get( 'BP1' , False )
		self.BP2 = kwargs.get( 'BP2' , False )
		self.BP3 = kwargs.get( 'BP3' , False )
		self.BP4 = kwargs.get( 'BP4' , False )
		self.BP5 = kwargs.get( 'BP5' , False )
		self.BP6 = kwargs.get( 'BP6' , False )
		self.BP7 = kwargs.get( 'BP7' , False )

		self.PSC1 = kwargs.get( 'PSC1' , False )
		self.PMC1 = kwargs.get( 'PMC1' , False )
		self.PPC1 = kwargs.get( 'PPC1' , False )
		self.PPC2 = kwargs.get( 'PPC2' , False )

		self.BSC1 = kwargs.get( 'BSC1' , False )
		self.BMC1 = kwargs.get( 'BMC1' , False )

		self.otherTranscripts = kwargs.get( 'otherTranscripts' , {} )
		self.alleleFrequency = kwargs.get( 'alleleFrequency' , None )
		self.vepAnnotations = kwargs.get( 'VEP' , None )
		self.vcfHeaders = kwargs.get( 'headers' , None )
		self.vcfInfo = kwargs.get( 'INFO' , [] )
		self.pathogenicity = kwargs.get( 'pathogenicity' , \
			{ "CharGer" : chargervariant.uncertain , \
			  "ACMG" : chargervariant.uncertain 
			}
		)
		self.clinical = kwargs.get( 'clinical' , { "description" : chargervariant.uncertain , "review_status" : "" } )
		self.pathogenicScore = kwargs.get( 'pathogenicScore' , 0 )
		self.benignScore = kwargs.get( 'benignScore' , 0 )
		self.chargerScore = kwargs.get( 'chargerScore' , 0 )
		self.callSummary = kwargs.get( 'summary' , [] )
		aParentVariant = kwargs.get( 'parentVariant' , None )
		if aParentVariant:
			super( chargervariant , self ).copyInfo( aParentVariant )
		# make vep and clinvar variants attributes of chargervariant
		self.vepVariant = kwargs.get( 'vepvariant' , vepvariant() )
		self.clinvarVariant = kwargs.get( 'clinvarvariant' , clinvarvariant() )
		self.transvarVariant = kwargs.get( 'transvarvariant' , None )
		self.scoresMap = kwargs.get( 'scoresMap' , dict() )
Beispiel #9
0
 def test_empty_init(self):
     v = vepvariant()
     self.assertFalse(v)