def __plink_to_npz(self, p, file_name): '''Convert p from plink to npz format using the file set specified by file_name.''' npz = file_name+'.npz' # Save test problem in plink format io.write_plink(p, prefix=file_name) # Convert plink -> npz io.plink_to_npz(file_name, npz) # Load npz and check that the problem object didn't change p2 = io.read_npz(npz) return p2
def __save_and_load_problem_plink(self, problem): '''Save and load a problem from PLINK file set.''' try: # Get a temporary file name f = tempfile.NamedTemporaryFile(delete=False) file_name = f.name f.close() io.write_plink(problem, file_name) return io.read_plink(prefix=file_name) finally: # Delete test files for ext in ['', '.pdg.tfam', '.tfam', '.tped', '.hap.tped', '.info']: os.remove(file_name + ext)
def __main(options): ''' -------------------------------------------------- Main program - accepts an options struct. -------------------------------------------------- ''' if options.debug: print 'Input options', options print 'Building phaser (stage = %d) ...' % (options.stage, ) phaser = build_phasing_pipeline(options) if options.debug: print 'Reading data ...' problem = __load_problem(options) if options.debug: print 'Phasing ...' params = PhaseParam() params.update_from_struct(options) request = run_phasing_chain(phaser, problem, params) print '' request.stats.pprint() print '' if options.output is not None: if options.min_output: print 'Minimizing output size...' io.slim(problem) out_prefix, ext = os.path.splitext(options.output) if ext == '.npz': print 'Writing haplotype result to %s in NPZ format ...' % ( options.output, ) io.write_npz(problem, options.output) output_info = out_prefix + '.info.npz' print 'Writing problem info result to %s in NPZ format ...' % ( output_info, ) io.write_info_npz(problem.info, output_info) else: print 'Writing haplotype result to %s in PLINK format ...' % ( options.output, ) io.write_plink(problem, options.output, verbose=options.debug) return problem
def __main(options): ''' -------------------------------------------------- Main program - accepts an options struct. -------------------------------------------------- ''' if options.debug: print 'Input options', options print 'Building phaser (stage = %d) ...' % (options.stage,) phaser = build_phasing_pipeline(options) if options.debug: print 'Reading data ...' problem = __load_problem(options) if options.debug: print 'Phasing ...' params = PhaseParam() params.update_from_struct(options) request = run_phasing_chain(phaser, problem, params) print '' request.stats.pprint() print '' if options.output is not None: if options.min_output: print 'Minimizing output size...' io.slim(problem) out_prefix, ext = os.path.splitext(options.output) if ext == '.npz': print 'Writing haplotype result to %s in NPZ format ...' % (options.output,) io.write_npz(problem, options.output) output_info = out_prefix + '.info.npz' print 'Writing problem info result to %s in NPZ format ...' % (output_info,) io.write_info_npz(problem.info, output_info) else: print 'Writing haplotype result to %s in PLINK format ...' % (options.output,) io.write_plink(problem, options.output, verbose=options.debug) return problem
# Convert plink tped -> npz problem = io.read_plink(prefix=base_name, pedigree=pedigree_file, haplotype=None, verbose=options.debug) # Phase, impute, fill missing phaser = phase.build_phasing_pipeline(options) request = phase.run_phasing_chain(phaser, problem) stats = request.stats print '' stats.pprint() print '' # Convert phased npz -> plink tped. Save only genotypes (haplotypes may need to be saved in the stats # object as a hash table for 'coloring the pedigree' at a later stage. genotype_file = out_base_name + '.tped' io.write_plink(problem, out_base_name, verbose=True, save_node_type=False, save_genotype=True, save_haplotype=False, save_error=False) # Save statistics and phasing metadata in a separate npz np.savez(out_base_name + '.stats', stats=np.array([stats]), info=np.array([problem.info]), pedigree=np.array([problem.pedigree])) plink_cmd_base = '%s --tfile %s' % (bu.PLINK, out_base_name,) if options.recode: # First, compute allele frequencies with PLINK util.run_command('%s --nonfounders --freq --out %s' % (plink_cmd_base, out_base_name)) # Convert frequencies file that to a reference allele recoding # file (a file containing the list of SNPs and their minor allele letter) bu.frq_to_minor_file(out_base_name + '.frq', out_base_name + '.mnr')