def qc_problem(problem, index_file=DEFAULT_ID_INDEX, samples=None, snps=None, debug=0, input_dir=None, segment_location=None, ibd_sample_index=None, debug_sample= -1): '''Impute the SNPs that have also been genotyped for all samples, and compare with the imputation result. Returns the phasing Problem and the imputed results. problem = npz file location or Problem instance. segment_location = IBD segment repository (segment file or segment index) location. ibd_sample_index = optional dictionary of IBD-segment-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity. algorithm options: 'index' - Imputation V2 using IBD cliques at each SNP (default). 'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet.''' # Read input data p = problem if isinstance(problem, Problem) else im.io.read_npz(problem) # Select SNPs if snps is not None: p = p.sub_problem_of_snps(snps) t = problem_to_imputation_set(p, index_file=index_file) print p print t.genotype # Load IBD index ibd = im.index.segment_index.SegmentIndex(segment_location if segment_location else '%s/index_segments' % (os.environ['OBER_OUT'],)) # Run imputation qc(ibd, t, samples=samples, debug=debug, genotype=p.g, ibd_sample_index=ibd_sample_index, debug_sample=debug_sample) return p, t
def impute_problem(problem, index_file=DEFAULT_ID_INDEX, samples=None, snps=None, debug=0, input_dir=None, algorithm='index', segment_location=None, ibd_sample_index=None, remove_partial_calls=False, debug_sample=-1, genotypes_as_is=False): '''Impute the SNPs that have also been genotyped for all samples, and compare with the imputation result. Returns the phasing Problem and the imputed results. problem = npz file location or Problem instance. segment_location = IBD segment repository (segment file or segment index) location. ibd_sample_index = optional dictionary of CGI-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity. algorithm options: 'index' - Imputation V2 using IBD cliques at each SNP (default). 'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet.''' # Read input data p = problem if isinstance(problem, Problem) else im.io.read_npz(problem) # Select SNPs if snps is not None: p = p.sub_problem_of_snps(snps) t = problem_to_imputation_set(p, index_file=index_file, genotypes_as_is=genotypes_as_is) print p print t.genotype # Run imputation segment_location = segment_location if segment_location else ('%s/segments.out' % (input_dir,) if algorithm == 'interval_tree' else '%s/index_segments' % (os.environ['OBER_OUT'],)) print 'IBD segment index at', segment_location print 'Imputing, algorithm %s ... ' % (algorithm,) if algorithm == 'interval_tree': ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, segment_location) print 'IBD segments', ibd.size im.imputation.iibd.impute(p.haplotype, ibd, t, samples=samples, debug=debug, ibd_sample_index=ibd_sample_index) elif algorithm == 'index': ibd = im.index.segment_index.SegmentIndex(segment_location) im.imputation.impute_ibd_index.impute(ibd, t, samples=samples, debug=debug, genotype=p.g, ibd_sample_index=ibd_sample_index, remove_partial_calls=remove_partial_calls, debug_sample=debug_sample) else: raise ValueError('Unsupported imputation algorithm ''%s''' % (algorithm,)) return p, t
def impute_problem( problem, index_file=DEFAULT_ID_INDEX, samples=None, snps=None, debug=0, input_dir=None, algorithm="index", segment_location=None, ibd_sample_index=None, remove_partial_calls=False, debug_sample=-1, genotypes_as_is=False, ): """Impute the SNPs that have also been genotyped for all samples, and compare with the imputation result. Returns the phasing Problem and the imputed results. problem = npz file location or Problem instance. segment_location = IBD segment repository (segment file or segment index) location. ibd_sample_index = optional dictionary of CGI-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity. algorithm options: 'index' - Imputation V2 using IBD cliques at each SNP (default). 'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet.""" # Read input data p = problem if isinstance(problem, Problem) else im.io.read_npz(problem) # Select SNPs if snps is not None: p = p.sub_problem_of_snps(snps) t = problem_to_imputation_set(p, index_file=index_file, genotypes_as_is=genotypes_as_is) print p print t.genotype # Run imputation segment_location = ( segment_location if segment_location else ( "%s/segments.out" % (input_dir,) if algorithm == "interval_tree" else "%s/index_segments" % (os.environ["OBER_OUT"],) ) ) print "IBD segment index at", segment_location print "Imputing, algorithm %s ... " % (algorithm,) if algorithm == "interval_tree": ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, segment_location) print "IBD segments", ibd.size im.imputation.iibd.impute(p.haplotype, ibd, t, samples=samples, debug=debug, ibd_sample_index=ibd_sample_index) elif algorithm == "index": ibd = im.index.segment_index.SegmentIndex(segment_location) im.imputation.impute_ibd_index.impute( ibd, t, samples=samples, debug=debug, genotype=p.g, ibd_sample_index=ibd_sample_index, remove_partial_calls=remove_partial_calls, debug_sample=debug_sample, ) else: raise ValueError("Unsupported imputation algorithm " "%s" "" % (algorithm,)) return p, t