if __name__ == "__main__": vcf_loc, out_loc = sys.argv[1], sys.argv[2] inWindow = [] outFile = open(out_loc, "w") # CSV output file format # start, stop, #Alt sites, #Ref sites, Average depth, Alt Reads (sum from all samples), ref reads (sum from all samples), # pseudo0:1000, pseudo0:1050, 5, 45, 197 # outFile.write( "start_chrom,start_pos,stop_chrom,stop_pos,altSiteCount,refSiteCount,avgDepth,avgRefReads,avgAltReads,avgOtherReads" + "\n") with open(vcf_loc) as f: for raw_line in f: if len(inWindow) == WINDOW_SIZE: #Analyze site processWindow(inWindow, outFile) inWindow = [] line = vcfLine(raw_line) if line.isDataLine: #Only add dataLines to the window inWindow.append(line) #process last incomplete block processWindow(inWindow, outFile) outFile.close()
import sys from vcfLine import vcfLine if __name__ == "__main__": vcf_in,out_loc = sys.argv[1],sys.argv[2] outFile = open(out_loc,"w") with open(vcf_in) as vcfFile: for line in vcfFile: vcf_line = vcfLine(line) if vcf_line.isDataLine: outFile.write(vcf_line.chrom + " " + vcf_line.pos + "\n") vcfFile.close() outFile.close()
from vcfDict import vcfDict from vcfLine import vcfLine from vcfSample import vcfSample from mpileLine import mpileLine if __name__ == "__main__": mpile_in,vcf_in,csv_out = sys.argv[1],sys.argv[2],sys.argv[3] #Load dictionary of sites in vcf_in file vcfSites = vcfDict(vcf_in) vcfSites.loadDict() #Create output file for csv_out outFile = open(csv_out,"w") with open(mpile_in) as pileFile: for line in pileFile: try: pile_line = mpileLine(line) print pile_line.repr() if vcfSites.siteExists(pile_line.siteID): vcf_line = vcfLine(vcfSites.getLine(pile_line.siteID)) except Exception, e: print sys.exc_info()[0] print traceback.format_exc() print line outFile.close() pileFile.close()
import sys from vcfLine import vcfLine from vcfSample import vcfSample HET = "0/1" def allHet(samples): """ Determines if the list of samples is all HET samples """ for sample in samples: if sample.GT != HET: return False return True if __name__ == "__main__": vcf_in, vcf_out = sys.argv[1], sys.argv[2] outFile = open(vcf_out, "w") with open(vcf_in) as file: for line in file: vLine = vcfLine(line) if vLine.isDataLine: if allHet(vLine.samples): outFile.write(line) outFile.close()
vcf_unfilt, vcf_targetSites, out_loc = sys.argv[1], sys.argv[2], sys.argv[ 3] outFile = open(out_loc, "w") outFile.write( "siteID,start_chrom,start_pos,stop_chrom,stop_pos,altSiteCount,refSiteCount,avgDepth,avgRefReads,avgAltReads,avgOtherReads" + "\n") targetSitesDict = vcfDict(vcf_targetSites) targetSitesDict.loadDict() window = [] with open(vcf_unfilt) as unfilt: for line in unfilt: lineVcf = vcfLine(line) if lineVcf.isDataLine: #Add data lines from unfiltered file to window window.append(lineVcf) if len( window ) == WINDOW_SIZE + 1: #Remove oldest site seen keep size == WINDOW_SIZE window.pop(0) #Check if the site in the middle of the window is a target site if checkSite(window[WINDOW_SIZE / 2], targetSitesDict): processWindow(window, outFile) outFile.close() unfilt.close()