Example #1
0
        io.write_plink(problem, out_base_name, verbose=True,
                       save_node_type=False, save_genotype=True, save_haplotype=False, save_error=False)
        
        # Save statistics and phasing metadata in a separate npz
        np.savez(out_base_name + '.stats', 
                 stats=np.array([stats]), 
                 info=np.array([problem.info]), 
                 pedigree=np.array([problem.pedigree]))

        plink_cmd_base = '%s --tfile %s' % (bu.PLINK, out_base_name,)
        if options.recode:
            # First, compute allele frequencies with PLINK  
            util.run_command('%s --nonfounders --freq --out %s' % (plink_cmd_base, out_base_name))
            # Convert frequencies file that to a reference allele recoding
            # file (a file containing the list of SNPs and their minor allele letter)
            bu.frq_to_minor_file(out_base_name + '.frq', out_base_name + '.mnr') 

            # Then convert binary PLINK to a recoded 12-recoded TPED, where 1=minor allele for each SNP
            out_recoded = out_base_name + '.recoded'                 
            util.run_command('%s --transpose --recode12 --reference-allele %s.mnr --out %s' % \
                           (plink_cmd_base, out_base_name, out_recoded))

            # Reload the recoded problem
            for ext in ('nof', 'tped', 'tfam'):
                os.rename(out_recoded + '.' + ext, out_base_name + '.' + ext)
            genotype = io_genotype.read('plink', 'genotype', tped=out_base_name + '.tped', load_ids=False)
        else:
            genotype = problem.genotype
            
        # Write problem to file in our (npz)
        io.write_npz(problem, out_base_name + '.npz')
Example #2
0
                                   num_parts=part_count[options.chrom])
         part_names_gxn = bu.partnames(bu.chrnames(options.out_gxn, part=options.chrom),
                                       num_parts=part_count[options.chrom])
         (start, stop) = endpoints[options.chrom]
         endpoints = util.brange(start, stop, part_size, endpoint=True, dtype=int)
         for (part, part_start) in enumerate(endpoints[:-1]):
             out = part_names[part]
             plink_cmd_base = '%s --cm --bfile %s --chr %d --from-bp %s --to-bp %s --out %s' \
             % (bu.PLINK, base_name, options.chrom, part_start, endpoints[part + 1], out)
             
             if options.recode:
                 # First, compute allele frequencies with PLINK  
                 util.run_command(plink_cmd_base + ' --nonfounders --freq')
                 # Convert frequencies file that to a reference allele recoding
                 # file (a file containing the list of SNPs and their minor allele letter)
                 bu.frq_to_minor_file(out + '.frq', out + '.mnr') 
                 if options.out_gxn:
                     # Copy FRQ to target output directory
                     out_frq = part_names_gxn[part] + '.frq'
                     mkdir_if_not_exists(os.path.dirname(out_frq))
                     shutil.copy(out + '.frq', out_frq)
 
                 # Then convert binary PLINK to a 12-recoded TPED, where 1=minor allele for each SNP                 
                 cmd = '%s --transpose --recode12 --reference-allele %s.mnr' % (plink_cmd_base, out)
                 util.run_command(cmd)
             else:
                 # No recoding, just convert binary to 2-recoded TPED. PLINK assigns "1" to
                 # the first-encountered allele in the file for each SNP.
                 cmd = '%s --transpose --recode12' % (plink_cmd_base,)
                 util.run_command(cmd)