if __name__ == "__main__":
    vcf_loc, out_loc = sys.argv[1], sys.argv[2]
    inWindow = []
    outFile = open(out_loc, "w")

    # CSV output file format
    # start,        stop,       #Alt sites,      #Ref sites,     Average depth, Alt Reads (sum from all samples), ref reads (sum from all samples),
    # pseudo0:1000, pseudo0:1050, 5,                45,             197
    #
    outFile.write(
        "start_chrom,start_pos,stop_chrom,stop_pos,altSiteCount,refSiteCount,avgDepth,avgRefReads,avgAltReads,avgOtherReads"
        + "\n")

    with open(vcf_loc) as f:
        for raw_line in f:
            if len(inWindow) == WINDOW_SIZE:
                #Analyze site
                processWindow(inWindow, outFile)
                inWindow = []

            line = vcfLine(raw_line)
            if line.isDataLine:
                #Only add dataLines to the window
                inWindow.append(line)
        #process last incomplete block
        processWindow(inWindow, outFile)

    outFile.close()
Example #2
0
import sys
from vcfLine import vcfLine






if __name__ == "__main__":
    vcf_in,out_loc = sys.argv[1],sys.argv[2]
    

    outFile = open(out_loc,"w")
    
    with open(vcf_in) as vcfFile:
        for line in vcfFile:
            vcf_line = vcfLine(line)
            if vcf_line.isDataLine:
                outFile.write(vcf_line.chrom + " " + vcf_line.pos + "\n")

    vcfFile.close()
    outFile.close()








Example #3
0
from vcfDict import vcfDict
from vcfLine import vcfLine
from vcfSample import vcfSample
from mpileLine import mpileLine

if __name__ == "__main__":
    mpile_in,vcf_in,csv_out = sys.argv[1],sys.argv[2],sys.argv[3]
    
    #Load dictionary of sites in vcf_in file
    vcfSites = vcfDict(vcf_in)
    vcfSites.loadDict()

    #Create output file for csv_out 
    outFile = open(csv_out,"w")
    
    with open(mpile_in) as pileFile:
        for line in pileFile:
            try:
                pile_line = mpileLine(line)
                print pile_line.repr()
                if vcfSites.siteExists(pile_line.siteID):
                    vcf_line = vcfLine(vcfSites.getLine(pile_line.siteID))

            except Exception, e:
                print sys.exc_info()[0] 
                print traceback.format_exc()
                print line

    outFile.close()
    pileFile.close()
import sys
from vcfLine import vcfLine
from vcfSample import vcfSample

HET = "0/1"


def allHet(samples):
    """
    Determines if the list of samples is all HET samples
    """
    for sample in samples:
        if sample.GT != HET:
            return False
    return True


if __name__ == "__main__":
    vcf_in, vcf_out = sys.argv[1], sys.argv[2]

    outFile = open(vcf_out, "w")
    with open(vcf_in) as file:
        for line in file:
            vLine = vcfLine(line)
            if vLine.isDataLine:
                if allHet(vLine.samples):
                    outFile.write(line)
    outFile.close()
Example #5
0
    vcf_unfilt, vcf_targetSites, out_loc = sys.argv[1], sys.argv[2], sys.argv[
        3]

    outFile = open(out_loc, "w")
    outFile.write(
        "siteID,start_chrom,start_pos,stop_chrom,stop_pos,altSiteCount,refSiteCount,avgDepth,avgRefReads,avgAltReads,avgOtherReads"
        + "\n")

    targetSitesDict = vcfDict(vcf_targetSites)
    targetSitesDict.loadDict()

    window = []

    with open(vcf_unfilt) as unfilt:
        for line in unfilt:
            lineVcf = vcfLine(line)

            if lineVcf.isDataLine:  #Add data lines from unfiltered file to window
                window.append(lineVcf)

                if len(
                        window
                ) == WINDOW_SIZE + 1:  #Remove oldest site seen keep size == WINDOW_SIZE
                    window.pop(0)

                    #Check if the site in the middle of the window is a target site
                    if checkSite(window[WINDOW_SIZE / 2], targetSitesDict):
                        processWindow(window, outFile)

    outFile.close()
    unfilt.close()