def vcfCollectionReport(self,vcfCollection,vcf,reorder=True,useCount=True,fill_blank="NA"):
     '''
     @summary:
     Build report object form variant calls groups by regions into "variance collection".
     Report is made to be easily written to delimited matrix / spread sheet format.
     '''
     result = DataReport()
     coverageReport = DataReport()
     data = vcfCollection.items()
     data.sort()
     
     strainIDs = vcfCollection.keys()
     #sort strain IDs
     
     #if reorder:
     if True:
         print "reordering report"
         columnNames = strainIDs
         cmap = {}
         regex = ".*_([0-9]+).*"
         for cName in columnNames:
             match = re.match(regex,cName)
             if match != None:
                 key = match.group(1)
             else:
                 key = cName
             cmap[key] = cName
         strainKeys = cmap.keys()
         strainKeys.sort()
      
     #for (strainID,vcfRegions) in vcfCollection.items():
     for strainKey in strainKeys:
         strainID = cmap[strainKey]
         vcfRegions = vcfCollection[strainID]
         
         print "Collecting [%s] regions" % (strainID)
         for (loc,vcfRegion) in vcfRegions.items():
             
             count = vcfRegion["Count"]
             vcfData = vcfRegion['vcfData']
             item = "[%s]:" % (count)
             coverageReport.add(loc,strainID,count)
                 
             if useCount:
                 if result.get(loc,"Region_Count") == None:
                     #result.add(loc,"Region_Count",0)
                     rCount = 0
                 else:
                     rCount = result.get(loc,"Region_Count")
                     rCount = float(rCount)
                     
                 if len(vcfData) != 0:
                     rCount += 1
                     #result.add(loc,"Region_Count",rCount) 
             
             for vcf in vcfData:
                 item = item + "(%s,%s,%s):" % (vcf["POS"],vcf["READS"],vcf["ALT"])
             if len(vcfData) != 0:
                 result.add(loc,strainID,item)
             else:
                 result.add(loc,strainID,fill_blank)
 
     return (result,coverageReport)
def processVariantData(vcf_data, bam_file, strainID = '', refName='', minQuality = 30, minCount = 1,minSize=0):
    '''
    @return [vcf]
    @summary:
    Process variant data link combine with related sequencing reference.
    Also creates a report of all variant data.
    '''
    #vReport = Report() #Testing new report object
    vReport = DataReport()
    vcfDataResult = []
    
    print "Opening bam file [%s]" % (bam_file)
    samFile = pysam.Samfile(bam_file,"rb")
    refname = samFile.getrname(0)
    
    for vcf in vcf_data:
        vcf["STRAIN"] = strainID
        irefname = vcf["CHROM"]
        start = int(vcf["POS"])
        ref = vcf["REF"]
        alt = vcf["ALT"]
        quality = vcf["QUAL"]
        end = start + len(ref)
        vcf["END"] = end
        
        alreads = samFile.fetch(irefname,start,end)
        alreads = list(alreads)
        rCount = len(alreads)
        
        vcf["READS"] = rCount
        vcf["READFILE"] = bam_file
        
        if float(quality < minQuality) or rCount < minCount or len(alt) < minSize:
            #print "vcf [%s] quality [%s] count [%s] size [%s] has failed to pass" % (vcf,quality,rCount,len(alt))
            continue
        else:
            vcfDataResult.append (vcf)
        
        if irefname == '':
            irefname = refname
        
        vID = "%s_%s" % (strainID,start)
        vReport.add(vID,"strainID", strainID)
        vReport.add(vID,"start", start)
        vReport.add(vID,"end", end)
        vReport.add(vID,"chrom",irefname)
        vReport.add(vID,"count",rCount)
        vReport.add(vID,"qual",quality)
        vReport.add(vID,"ref",ref)
        vReport.add(vID,"alt",alt)
        vReport.add(vID,"bamFile",bam_file)
        #vReport.add(vID,"samFile",samFile)
    
    return (vcfDataResult, vReport)
 masterReportFile = workFolder + "Master_%s_Report.txt" % (analysisName)
 varianceReportFile = workFolder + "Variance_%s_Report.txt" % (analysisName)
 coverageReportFile = workFolder + "Coverage_%s_Report.txt" % (analysisName)
  
 #========================
 # Construct variant data
 #======================== 
     
 if os.path.exists(mReportFile) and 'rebuild' not in mode:
     print "Loading master repository [%s]" % (mReportFile)
     mReportFh = open(mReportFile,"r")
     masterReport = pickle.load(mReportFh)
     mReportFh.close()
 else:
     print "==> Starting new master report [%s]" % (mReportFile)
     masterReport = DataReport()
     
 if os.path.exists(varianceRepositoryFile) and 'rebuild' not in mode:
     print "Loading variance repository [%s]" % (varianceRepositoryFile)
     fh = open(varianceRepositoryFile,"r")
     varRepository = pickle.load(fh)
     fh.close()
 else:
     print "==> Staring new variant repository"
     varRepository = {}    
     
 '''
 Running in 'build' mode processes sequencing data into 
 index bam files and vcf files.
 '''
 if 'build' in mode: