예제 #1
0
 def __init__(self):
     self.blastTools = BlastTools()
     self.verbose = False
예제 #2
0
class ProcessVCF:
    '''
    @summary:
    Tools set for dealing with variant call format (vcf) Files.
    Specifically this tools set does the following
    1. Find expanding regions of variants with a designated distance across all samples
    2. Combine variants in each sample into a report object to be used for machine learning analysis of genotype to phenotype.
    3. Create a distance matrix using given variants between given strains. 
    
    '''
    
    def __init__(self):
        self.blastTools = BlastTools()
        self.verbose = False
        
    def writeToLog(self,stringValue,logFileH,verbose=False):
        if logFileH == None:
            return None
        try:
            if verbose: print stringValue
            logFileH.write(stringValue+"\n")
        except:
            print "failed to write to log file [%s]" % (logFileH)
        return None

    def replaceSeqTarget(self,seq,newSeq,loc):
        prefix = seq[:loc]
        post = seq[loc+len(newSeq):]
        result = prefix + newSeq + post
        return result

    def findTargets(self,targetMap,feature,minQuality=10,logFile=None):
        '''
        @var 
        @summary:
        Find variant calls in proximity to given feature.
        '''
        start = feature.location.start.position
        end = feature.location.end.position
        locations = targetMap.keys()
        matches = filter(lambda x: start<=float(x)<=end,locations)
        
        querySeq = feature.qualifiers["query"]
        subjectSeq = feature.qualifiers["subject"]
        refSeq = Seq("_"*len(subjectSeq))
        readSeq = Seq("_"*len(subjectSeq))
        qualityValues = []
        chrom = ''        
        
        dCount = 0    
        for loc in matches:
            floc = float(loc)
            seqStart = int(floc-start)
            
            target = targetMap[loc]
            chrom = target["CHROM"]
            ref = target["REF"]
            alts = target["ALT"].split(",")
            quality = target["QUAL"]
            qualityValues.append((seqStart,quality))
            
            if float(quality) < minQuality:
                print "(failed) Feature [%s] (%s <-> %s) ===> %s" % (feature.id,start,end,qualityValues)
            else:
                dCount += 1
            
            refSeq = self.replaceSeqTarget(refSeq,ref,seqStart)
            for alt in alts:
                readSeq = self.replaceSeqTarget(readSeq,alt,seqStart)
        
        result = {}
        
        result["name"] = feature.id
        result["chrom"] = chrom
        result["start"] = start
        result["end"] = end
        result["quality"] = qualityValues 
        result["query"] = querySeq
        result["subject"] = subjectSeq
        result["refSeq"] = refSeq
        result["alt-read"] = readSeq
        
        return result

    def annotateAlignment(self,targetMap,featuresArray,idTag):
        readLogName = "read_log_%s.txt" % (idTag)
        logFP = open(readLogName,"w")
        result = {}
        
        for features in featuresArray:
            for feature in features:
                id = feature.id
                targets = self.findTargets(targetMap, feature, minQuality= 10, logFile = logFP)
                result[id] = targets
        return result

    def alignVCF(self,targetFile,vcfFile,idTag):
        '''
        @return: ReportObect
        @summary:
        Check alignment to annotated Genomic sequence using BLAST.
        
        '''
        readRecord = parseVCFFile(vcfFile)
    
        targetRecords= SeqIO.parse(open(targetFile), "fasta")
        targetRecords = list(targetRecords)
    
        print "Processing [%s] records" % (len(targetRecords))
        
        if verbose: print "blasting sequences"
        
        self.blastTools.verbose = verbose
        blastedFeatures = self.blastTools.seqBlastToFeatures(blastDB, blastExe, targetFile, blastType = "blastn",scoreMin = 1e-5)
        
        if verbose: print "finished blasting locations"
        alignmentReport = self.annotateAlignment(readRecord, blastedFeatures,idTag)
        
        return alignmentReport