def read_plink(**kwargs): '''Load a problem from the following PLINK files: Default Override Option Data Format ====================================================================================== prefix.pdg.tfam pedigree Pedigree adjacency PLINK TFAM (genotyped+nongenotyped samples) prefix.tfam pedigree_genotyped Genotyped sample pedigree (sub-graph of the pedigree) PLINK TFAM corresponding to prefix.tped prefix.tped genotype Genotype data PLINK TPED prefix.hap.tped haplotype* Haplotype data PLINK TPED prefix.err error** Genotype errors flagged Integer array (snps x samples) prefix.info info Problem info pickle (binary) prefix.frm frames LD-independent SNP frames text file prefix.lam lam*** Haplotype est. recombination rate text file * - hap data not loaded if this option is None. ** - errors set to 0 if this file is not found or this option is set to None. *** - data not loaded if if this file is not found. ''' # Read input options verbose = kwargs.get('verbose', False) prefix = kwargs.get('prefix', '') overrideable_option = lambda name, default: kwargs.get(name, default if prefix else None) pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam') pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam') genotype = overrideable_option('genotype', prefix + '.tped') haplotype = overrideable_option('haplotype', prefix + '.hap.tped') error_file = overrideable_option('error', prefix + '.err') info = overrideable_option('info', prefix + '.info') if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]): raise ValueError('Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files') frames_file = overrideable_option('frames', prefix + '.frm') lam_file = overrideable_option('lam', prefix + '.lam') # Load data print_location = lambda x : x if x else '-' if verbose: print 'Reading pedigree from %s, %s ...' % (print_location(pedigree), print_location(pedigree_genotyped),) p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped) if verbose: print 'Reading genotype data from %s ...' % (print_location(genotype),) g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False) if verbose: print 'Reading haplotype data from %s ...' % (print_location(haplotype),) h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None if verbose: print 'Reading error data from %s ...' % (print_location(error_file),) error = np.loadtxt(error_file) if error_file and os.path.isfile(error_file) else None if verbose: print 'Reading frame data from %s ...' % (print_location(frames_file),) frames = db_gene.snp.ld_graph.read_frames(frames_file) if frames_file else None lam = np.loadtxt(lam_file) if lam_file and os.path.isfile(lam_file) else None # info = ProblemInfo(p, g) if info is None else info problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam) if haplotype and info: if verbose: print 'Reading problem info from %s ...' % (info,) with open(info, 'rb') as fout: problem.info = pickle.load(fout) return problem
def read_plink(**kwargs): '''Load a problem from the following PLINK files: Default Override Option Data Format ====================================================================================== prefix.pdg.tfam pedigree Pedigree adjacency PLINK TFAM (genotyped+nongenotyped samples) prefix.tfam pedigree_genotyped Genotyped sample pedigree (sub-graph of the pedigree) PLINK TFAM corresponding to prefix.tped prefix.tped genotype Genotype data PLINK TPED prefix.hap.tped haplotype* Haplotype data PLINK TPED prefix.err error** Genotype errors flagged Integer array (snps x samples) prefix.info info Problem info pickle (binary) prefix.frm frames LD-independent SNP frames text file prefix.lam lam*** Haplotype est. recombination rate text file * - hap data not loaded if this option is None. ** - errors set to 0 if this file is not found or this option is set to None. *** - data not loaded if if this file is not found. ''' # Read input options verbose = kwargs.get('verbose', False) prefix = kwargs.get('prefix', '') overrideable_option = lambda name, default: kwargs.get( name, default if prefix else None) pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam') pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam') genotype = overrideable_option('genotype', prefix + '.tped') haplotype = overrideable_option('haplotype', prefix + '.hap.tped') error_file = overrideable_option('error', prefix + '.err') info = overrideable_option('info', prefix + '.info') if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]): raise ValueError( 'Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files' ) frames_file = overrideable_option('frames', prefix + '.frm') lam_file = overrideable_option('lam', prefix + '.lam') # Load data print_location = lambda x: x if x else '-' if verbose: print 'Reading pedigree from %s, %s ...' % ( print_location(pedigree), print_location(pedigree_genotyped), ) p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped) if verbose: print 'Reading genotype data from %s ...' % ( print_location(genotype), ) g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False) if verbose: print 'Reading haplotype data from %s ...' % ( print_location(haplotype), ) h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None if verbose: print 'Reading error data from %s ...' % (print_location(error_file), ) error = np.loadtxt( error_file) if error_file and os.path.isfile(error_file) else None if verbose: print 'Reading frame data from %s ...' % ( print_location(frames_file), ) frames = db_gene.snp.ld_graph.read_frames( frames_file) if frames_file else None lam = np.loadtxt( lam_file) if lam_file and os.path.isfile(lam_file) else None # info = ProblemInfo(p, g) if info is None else info problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam) if haplotype and info: if verbose: print 'Reading problem info from %s ...' % (info, ) with open(info, 'rb') as fout: problem.info = pickle.load(fout) return problem