Example #1
0
def main(args):
    ''' 
        This script takes in a VCF file (a sample x snp matrux), splits it 
        into --ref and --imp sets. From the --imp set, it further reduces 
        the VCF to a set of --tags. Using the --imp X --tags subset, it uses
        beagle 4 to impute and discovers the miscalssification rate of the 
        
    '''
    parser = OptionParser()
    parser.add_option('--vcf')
    parser.add_option('--ids', default=None)
    parser.add_option('--tags', default=None)
    parser.add_option('--call_thresh', default=0.45, type='float')
    # parse options!
    options, args = parser.parse_args(args)
    processes = []
    # filter imp set into tag set
    log('Generating keep and ref sets' + '#' * 50)
    if not os.path.exists('keep_' + os.path.basename(options.ids) +
                          '.recode.vcf'):
        p = Popen([
            'vcftools', '--vcf', options.vcf, '--keep', options.ids,
            '--recode', '--out', 'keep_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: keep'
        processes.append(p)
    if not os.path.exists('ref_' + os.path.basename(options.ids) +
                          '.recode.vcf'):
        p = Popen([
            'vcftools', '--vcf', options.vcf, '--remove', options.ids,
            '--recode', '--out', 'ref_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: remove'
        processes.append(p)
    # Wait for vcftools to be done
    wait(processes)
    # filter to only tags for keep set
    if not os.path.exists('tags_' + os.path.basename(options.ids) +
                          '.recode.vcf'):
        log('Genertaing Tag VCF' + '#' * 50)
        p = Popen([
            'vcftools', '--vcf',
            'keep_' + os.path.basename(options.ids) + '.recode.vcf', '--snps',
            options.tags, '--recode', '--out',
            'tags_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: tags'
        processes.append(p)
        wait(processes)
    # phase the reference pops
    log('Phasing reference set' + '#' * 50)
    if not os.path.exists('ref_phased_' + os.path.basename(options.ids) +
                          '.recode.vcf'):
        p = Popen([
            'java', '-jar', '/project/mccuelab/shared/bin/b4.r1274.jar',
            'gt=ref_' + os.path.basename(options.ids) + '.recode.vcf',
            'out=ref_phased_' + os.path.basename(options.ids) + '.recode'
        ])
        p.desc = 'ref phasing'
        processes.append(p)
        wait(processes)
        unzip('ref_phased_' + os.path.basename(options.ids) + '.recode.vcf.gz',
              processes)
    log('Imputing Tags' + '#' * 50)
    if not os.path.exists('imputed_' + os.path.basename(options.ids) +
                          '.recode.vcf'):
        p = Popen([
            'java',
            '-jar',
            '/project/mccuelab/shared/bin/b4.r1274.jar',
            'gt=tags_' + os.path.basename(options.ids) + '.recode.vcf',
            'ref=ref_phased_' + os.path.basename(options.ids) + '.recode.vcf',
            'out=imputed_' + os.path.basename(options.ids) + '.recode',
        ])
        p.desc = 'tag imputation'
        processes.append(p)
        wait(processes)
        unzip('imputed_' + os.path.basename(options.ids) + '.recode.vcf.gz',
              processes)

    # Now do what you came here to do!
    tags = TagSet('tags_' + os.path.basename(options.ids) + '.recode.vcf')
    print(
        os.path.basename(options.ids),
        tags.misclassification(
            VCF("imputed_" + os.path.basename(options.ids) + ".recode.vcf"),
            VCF("keep_" + os.path.basename(options.ids) + ".recode.vcf"),
            call_thresh=options.call_thresh))
Example #2
0
def main(args):
    ''' 
        This script takes in a VCF file (a sample x snp matrux), splits it 
        into --ref and --imp sets. From the --imp set, it further reduces 
        the VCF to a set of --tags. Using the --imp X --tags subset, it uses
        beagle 4 to impute and discovers the miscalssification rate of the 
        
    '''
    parser = OptionParser()
    parser.add_option('--vcf')
    parser.add_option('--ids',default=None)
    parser.add_option('--tags',default=None)
    parser.add_option('--call_thresh',default=0.45,type='float')
    # parse options!
    options,args = parser.parse_args(args)
    processes = []
    # filter imp set into tag set
    log('Generating keep and ref sets' + '#'*50)
    if not os.path.exists('keep_'+os.path.basename(options.ids)+'.recode.vcf'):
        p = Popen([
            'vcftools', 
            '--vcf', options.vcf,
            '--keep', options.ids,
            '--recode',
            '--out', 'keep_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: keep'
        processes.append(p)
    if not os.path.exists('ref_'+os.path.basename(options.ids)+'.recode.vcf'):
        p = Popen([
            'vcftools', 
            '--vcf', options.vcf,
            '--remove', options.ids,
            '--recode',
            '--out', 'ref_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: remove' 
        processes.append(p)
    # Wait for vcftools to be done
    wait(processes)
    # filter to only tags for keep set
    if not os.path.exists('tags_'+os.path.basename(options.ids)+'.recode.vcf'):
        log('Genertaing Tag VCF'+ '#'*50)
        p = Popen([
            'vcftools',
            '--vcf', 'keep_' + os.path.basename(options.ids) + '.recode.vcf',
            '--snps', options.tags,
            '--recode',
            '--out', 'tags_' + os.path.basename(options.ids)
        ])
        p.desc = 'vcftools: tags'
        processes.append(p)
        wait(processes)
    # phase the reference pops
    log('Phasing reference set' + '#'*50)
    if not os.path.exists('ref_phased_'+os.path.basename(options.ids)+'.recode.vcf'):
        p = Popen([
            'java', 
            '-jar', '/project/mccuelab/shared/bin/b4.r1274.jar',
            'gt=ref_'+os.path.basename(options.ids)+'.recode.vcf',
            'out=ref_phased_'+os.path.basename(options.ids)+'.recode'
        ])
        p.desc = 'ref phasing'
        processes.append(p)
        wait(processes)
        unzip('ref_phased_'+os.path.basename(options.ids)+'.recode.vcf.gz',processes)
    log('Imputing Tags' + '#'*50)
    if not os.path.exists('imputed_'+os.path.basename(options.ids)+'.recode.vcf'):
        p = Popen([
            'java', 
            '-jar', '/project/mccuelab/shared/bin/b4.r1274.jar',
            'gt=tags_'+os.path.basename(options.ids)+'.recode.vcf',
            'ref=ref_phased_'+os.path.basename(options.ids)+'.recode.vcf',
            'out=imputed_'+os.path.basename(options.ids)+'.recode',
        ])
        p.desc = 'tag imputation'
        processes.append(p)
        wait(processes)
        unzip('imputed_'+os.path.basename(options.ids)+'.recode.vcf.gz',processes)

    # Now do what you came here to do!
    tags = TagSet('tags_'+os.path.basename(options.ids)+'.recode.vcf')
    print(os.path.basename(options.ids),
        tags.misclassification(
            VCF("imputed_"+os.path.basename(options.ids)+".recode.vcf"),
            VCF("keep_"+os.path.basename(options.ids)+".recode.vcf"),
            call_thresh=options.call_thresh
        )
    )
Example #3
0
def unzip(file, processes):
    p = Popen(['gunzip', '-f', file])
    p.desc = 'unzipping'
    processes.append(p)
    wait(processes)
Example #4
0
def unzip(file,processes):
    p = Popen(['gunzip','-f',file])    
    p.desc = 'unzipping'
    processes.append(p)
    wait(processes)