Esempio n. 1
0
File: qc.py Progetto: orenlivne/ober
def qc_problem(problem, index_file=DEFAULT_ID_INDEX, samples=None, snps=None, debug=0,
               input_dir=None, segment_location=None, ibd_sample_index=None, debug_sample= -1):
    '''Impute the SNPs that have also been genotyped for all samples, and compare with the
    imputation result. Returns the phasing Problem and the imputed results.
    
    problem = npz file location or Problem instance.
    segment_location = IBD segment repository (segment file or segment index) location.
    ibd_sample_index = optional dictionary of IBD-segment-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity.
    
    algorithm options:
    'index' - Imputation V2 using IBD cliques at each SNP (default).
    'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet.'''
    # Read input data
    p = problem if isinstance(problem, Problem) else im.io.read_npz(problem)
    
    # Select SNPs
    if snps is not None: p = p.sub_problem_of_snps(snps)
    t = problem_to_imputation_set(p, index_file=index_file)
    print p
    print t.genotype

    # Load IBD index    
    ibd = im.index.segment_index.SegmentIndex(segment_location if segment_location else '%s/index_segments' % (os.environ['OBER_OUT'],))
    # Run imputation
    qc(ibd, t, samples=samples, debug=debug, genotype=p.g, ibd_sample_index=ibd_sample_index, debug_sample=debug_sample)
    return p, t
Esempio n. 2
0
def impute_problem(problem, index_file=DEFAULT_ID_INDEX, samples=None, snps=None, debug=0,
                   input_dir=None, algorithm='index', segment_location=None, ibd_sample_index=None,
                   remove_partial_calls=False, debug_sample=-1, genotypes_as_is=False):
    '''Impute the SNPs that have also been genotyped for all samples, and compare with the
    imputation result. Returns the phasing Problem and the imputed results.
    
    problem = npz file location or Problem instance.
    segment_location = IBD segment repository (segment file or segment index) location.
    ibd_sample_index = optional dictionary of CGI-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity.
    
    algorithm options:
    'index' - Imputation V2 using IBD cliques at each SNP (default).
    'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet.'''
    # Read input data
    p = problem if isinstance(problem, Problem) else im.io.read_npz(problem)
    
    # Select SNPs
    if snps is not None: p = p.sub_problem_of_snps(snps)    
    t = problem_to_imputation_set(p, index_file=index_file, genotypes_as_is=genotypes_as_is)
    print p
    print t.genotype
    
    # Run imputation
    segment_location = segment_location if segment_location else ('%s/segments.out' % (input_dir,) if algorithm == 'interval_tree' 
                                                                  else '%s/index_segments' % (os.environ['OBER_OUT'],))
    print 'IBD segment index at', segment_location    
    print 'Imputing, algorithm %s ... ' % (algorithm,)
    if algorithm == 'interval_tree':
        ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, segment_location)
        print 'IBD segments', ibd.size
        im.imputation.iibd.impute(p.haplotype, ibd, t, samples=samples, debug=debug, ibd_sample_index=ibd_sample_index)
    elif algorithm == 'index':
        ibd = im.index.segment_index.SegmentIndex(segment_location)
        im.imputation.impute_ibd_index.impute(ibd, t, samples=samples, debug=debug, genotype=p.g, ibd_sample_index=ibd_sample_index, remove_partial_calls=remove_partial_calls, debug_sample=debug_sample)
    else:
        raise ValueError('Unsupported imputation algorithm ''%s''' % (algorithm,))
    return p, t
Esempio n. 3
0
def impute_problem(
    problem,
    index_file=DEFAULT_ID_INDEX,
    samples=None,
    snps=None,
    debug=0,
    input_dir=None,
    algorithm="index",
    segment_location=None,
    ibd_sample_index=None,
    remove_partial_calls=False,
    debug_sample=-1,
    genotypes_as_is=False,
):
    """Impute the SNPs that have also been genotyped for all samples, and compare with the
    imputation result. Returns the phasing Problem and the imputed results.
    
    problem = npz file location or Problem instance.
    segment_location = IBD segment repository (segment file or segment index) location.
    ibd_sample_index = optional dictionary of CGI-ID-problem-sample-ID. If None, the mapping between the two IDs is assumed to be the identity.
    
    algorithm options:
    'index' - Imputation V2 using IBD cliques at each SNP (default).
    'interval_tree' - Imputation V1 using interval tree queries on a SmartSegmentSet."""
    # Read input data
    p = problem if isinstance(problem, Problem) else im.io.read_npz(problem)

    # Select SNPs
    if snps is not None:
        p = p.sub_problem_of_snps(snps)
    t = problem_to_imputation_set(p, index_file=index_file, genotypes_as_is=genotypes_as_is)
    print p
    print t.genotype

    # Run imputation
    segment_location = (
        segment_location
        if segment_location
        else (
            "%s/segments.out" % (input_dir,)
            if algorithm == "interval_tree"
            else "%s/index_segments" % (os.environ["OBER_OUT"],)
        )
    )
    print "IBD segment index at", segment_location
    print "Imputing, algorithm %s ... " % (algorithm,)
    if algorithm == "interval_tree":
        ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, segment_location)
        print "IBD segments", ibd.size
        im.imputation.iibd.impute(p.haplotype, ibd, t, samples=samples, debug=debug, ibd_sample_index=ibd_sample_index)
    elif algorithm == "index":
        ibd = im.index.segment_index.SegmentIndex(segment_location)
        im.imputation.impute_ibd_index.impute(
            ibd,
            t,
            samples=samples,
            debug=debug,
            genotype=p.g,
            ibd_sample_index=ibd_sample_index,
            remove_partial_calls=remove_partial_calls,
            debug_sample=debug_sample,
        )
    else:
        raise ValueError("Unsupported imputation algorithm " "%s" "" % (algorithm,))
    return p, t