예제 #1
0
def main():
    usage = "usage: %prog [options]  "
    parser = argparse.ArgumentParser(description='Given a gzipped vcf file and pedigree file, generate a new vcf with only those samples present in the pedigree (ped file) ')
    parser.add_argument('-ped', dest='pedfile', type=str, help="*.ped file")
    parser.add_argument('vcfile',  type=str,help='*.vcf.gz file')

    args=parser.parse_args()

    """ parse the pedfile and return the list of iids to keep from the VCF file """
    pedobj=Pedfile(args.pedfile)
    pedobj.parsePedfile()

    keeplist=  pedobj.returnIndivids()

    #open the VCFfile
    vcfh=gzip.open(args.vcfile,'r')
    vcfobj=VcfFile(args.vcfile)

    vcfobj.parseMetaAndHeaderLines(vcfh)
    samples=vcfobj.getSampleList()
    newsamples= [ s for s in samples if s in keeplist]

    print newsamples

    vcfobj.setSampleList(newsamples)
    header=vcfobj.returnHeader()
    print header

    for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh):
        keepGenotypes=[]
        vrec_ziptuple=vrec.zipGenotypes(samples)
        for (s, genObj) in vrec_ziptuple:
            if s in keeplist:
                keepGenotypes.append( genObj )
    
        vrec.addGenotypeList(  keepGenotypes )
        print vrec.toStringwithGenotypes()
예제 #2
0
def main():

    """  remove samples from a vcf file """
    usage = "usage: %prog [options] file.vcf.gz "
    # parser = OptionParser(usage)
    parser = argparse.ArgumentParser(description="remove samples from vcf file")
    parser.add_argument("removesamples", metavar="sample", type=str, nargs="+", help="sample names to remove")
    parser.add_argument("-vcf", dest="vcfile", type=str, help="vcf file to remove samples from")
    # parser.add_argument("vcf", help="vcf file to analyze")
    args = parser.parse_args()
    # print 'remove these samples: ', args.samples
    # print args.vcfile

    vcfh = gzip.open(args.vcfile, "r")
    vcfobj = VcfFile(args.vcfile)

    vcfobj.parseMetaAndHeaderLines(vcfh)

    # print header
    samples = vcfobj.getSampleList()
    newsamples = [s for s in samples if s not in args.removesamples]
    # print 'keep these samples: ',  newsamples
    vcfobj.setSampleList(newsamples)
    header = vcfobj.returnHeader()
    print header

    for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh):
        keepGenotypes = []
        vrec_ziptuple = vrec.zipGenotypes(samples)
        for (s, genObj) in vrec_ziptuple:
            if s not in args.removesamples:
                # print s
                keepGenotypes.append(genObj)
        # print keepGenotypes
        vrec.addGenotypeList(keepGenotypes)
        print vrec.toStringwithGenotypes()