def main(): usage = "usage: %prog [options] " parser = argparse.ArgumentParser(description='Given a gzipped vcf file and pedigree file, generate a new vcf with only those samples present in the pedigree (ped file) ') parser.add_argument('-ped', dest='pedfile', type=str, help="*.ped file") parser.add_argument('vcfile', type=str,help='*.vcf.gz file') args=parser.parse_args() """ parse the pedfile and return the list of iids to keep from the VCF file """ pedobj=Pedfile(args.pedfile) pedobj.parsePedfile() keeplist= pedobj.returnIndivids() #open the VCFfile vcfh=gzip.open(args.vcfile,'r') vcfobj=VcfFile(args.vcfile) vcfobj.parseMetaAndHeaderLines(vcfh) samples=vcfobj.getSampleList() newsamples= [ s for s in samples if s in keeplist] print newsamples vcfobj.setSampleList(newsamples) header=vcfobj.returnHeader() print header for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh): keepGenotypes=[] vrec_ziptuple=vrec.zipGenotypes(samples) for (s, genObj) in vrec_ziptuple: if s in keeplist: keepGenotypes.append( genObj ) vrec.addGenotypeList( keepGenotypes ) print vrec.toStringwithGenotypes()
def main(): """ remove samples from a vcf file """ usage = "usage: %prog [options] file.vcf.gz " # parser = OptionParser(usage) parser = argparse.ArgumentParser(description="remove samples from vcf file") parser.add_argument("removesamples", metavar="sample", type=str, nargs="+", help="sample names to remove") parser.add_argument("-vcf", dest="vcfile", type=str, help="vcf file to remove samples from") # parser.add_argument("vcf", help="vcf file to analyze") args = parser.parse_args() # print 'remove these samples: ', args.samples # print args.vcfile vcfh = gzip.open(args.vcfile, "r") vcfobj = VcfFile(args.vcfile) vcfobj.parseMetaAndHeaderLines(vcfh) # print header samples = vcfobj.getSampleList() newsamples = [s for s in samples if s not in args.removesamples] # print 'keep these samples: ', newsamples vcfobj.setSampleList(newsamples) header = vcfobj.returnHeader() print header for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh): keepGenotypes = [] vrec_ziptuple = vrec.zipGenotypes(samples) for (s, genObj) in vrec_ziptuple: if s not in args.removesamples: # print s keepGenotypes.append(genObj) # print keepGenotypes vrec.addGenotypeList(keepGenotypes) print vrec.toStringwithGenotypes()