コード例 #1
0
ファイル: impute_test_util.py プロジェクト: orenlivne/ober
 def haplotype_hut():
     '''Load partially-phased haplotypes for the hutterites sample problem. 
     Cached since it's large.'''
     if not Templates.HAPLOTYPE_HUT:
         Templates.HAPLOTYPE_HUT = io_genotype.read('plink', 'haplotype', tped=GENOTYPE_SAMPLE + '.hap.tped',
                                                    load_ids=False)
     return Templates.HAPLOTYPE_HUT
コード例 #2
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_nearest_snp(self):
     '''Test finding the nearest SNP of a given base pair location.'''
     g = io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_SAMPLE)
     assert_equal(list(g.nearest_snp_multiple(g.base_pair)), g.snp_range, 'Wrong nearest SNP location')
     i = 5
     assert_equal(g.nearest_snp(g.base_pair[i]+0.01), i, 'Wrong nearest SNP location')
     assert_equal(g.nearest_snp(g.base_pair[i]-0.01), i, 'Wrong nearest SNP location')
コード例 #3
0
ファイル: impute_test_util.py プロジェクト: orenlivne/ober
 def problem_hut():
     '''Load the hutterites data set. Cached since it's large.'''
     if not Templates.PROBLEM_HUT:
         pedigree = Templates.pedigree_hut()
         genotype = io_genotype.read('plink', 'genotype', prefix=GENOTYPE_SAMPLE, load_ids=False)
         Templates.PROBLEM_HUT = Problem(pedigree, genotype) 
     return Templates.PROBLEM_HUT 
コード例 #4
0
ファイル: TestPhaseTrivialDuo.py プロジェクト: orenlivne/ober
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(prefix=itu.GENOTYPE_DUO, haplotype=None, pedigree=itu.GENOTYPE_DUO + '.tfam')
     self.phaser = trivial_phaser()
     # Expected results
     self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_DUO_SOLUTION))
コード例 #5
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_nearest_snp(self):
     '''Test finding the nearest SNP of a given base pair location.'''
     g = io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_SAMPLE)
     assert_equal(list(g.nearest_snp_multiple(g.base_pair)), g.snp_range,
                  'Wrong nearest SNP location')
     i = 5
     assert_equal(g.nearest_snp(g.base_pair[i] + 0.01), i,
                  'Wrong nearest SNP location')
     assert_equal(g.nearest_snp(g.base_pair[i] - 0.01), i,
                  'Wrong nearest SNP location')
コード例 #6
0
ファイル: impute_test_util.py プロジェクト: orenlivne/ober
 def haplotype_hut():
     '''Load partially-phased haplotypes for the hutterites sample problem. 
     Cached since it's large.'''
     if not Templates.HAPLOTYPE_HUT:
         Templates.HAPLOTYPE_HUT = io_genotype.read('plink',
                                                    'haplotype',
                                                    tped=GENOTYPE_SAMPLE +
                                                    '.hap.tped',
                                                    load_ids=False)
     return Templates.HAPLOTYPE_HUT
コード例 #7
0
ファイル: impute_test_util.py プロジェクト: orenlivne/ober
 def problem_hut():
     '''Load the hutterites data set. Cached since it's large.'''
     if not Templates.PROBLEM_HUT:
         pedigree = Templates.pedigree_hut()
         genotype = io_genotype.read('plink',
                                     'genotype',
                                     prefix=GENOTYPE_SAMPLE,
                                     load_ids=False)
         Templates.PROBLEM_HUT = Problem(pedigree, genotype)
     return Templates.PROBLEM_HUT
コード例 #8
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(prefix=itu.GENOTYPE_TRIO, haplotype=None, pedigree=itu.GENOTYPE_TRIO + '.tfam')
     self.phaser = trivial_phaser()
     
     # Expected results
     self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_TRIO_SOLUTION))
コード例 #9
0
ファイル: io.py プロジェクト: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''
    
    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]):
        raise ValueError('Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files')
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')
    
    # Load data
    print_location = lambda x : x if x else '-'
    
    if verbose: print 'Reading pedigree from %s, %s ...' % (print_location(pedigree), print_location(pedigree_genotyped),)
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)
    
    if verbose:  print 'Reading genotype data from %s ...' % (print_location(genotype),)
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)
    
    if verbose: print 'Reading haplotype data from %s ...' % (print_location(haplotype),)
    h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None
    
    if verbose: print 'Reading error data from %s ...' % (print_location(error_file),)
    error = np.loadtxt(error_file) if error_file and os.path.isfile(error_file) else None
    
    if verbose: print 'Reading frame data from %s ...' % (print_location(frames_file),)
    frames = db_gene.snp.ld_graph.read_frames(frames_file) if frames_file else None
    
    lam = np.loadtxt(lam_file) if lam_file and os.path.isfile(lam_file) else None
    
    # info = ProblemInfo(p, g) if info is None else info    
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose: 
            print 'Reading problem info from %s ...' % (info,)
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem
コード例 #10
0
ファイル: map.py プロジェクト: orenlivne/ober
        if options.recode:
            # First, compute allele frequencies with PLINK  
            util.run_command('%s --nonfounders --freq --out %s' % (plink_cmd_base, out_base_name))
            # Convert frequencies file that to a reference allele recoding
            # file (a file containing the list of SNPs and their minor allele letter)
            bu.frq_to_minor_file(out_base_name + '.frq', out_base_name + '.mnr') 

            # Then convert binary PLINK to a recoded 12-recoded TPED, where 1=minor allele for each SNP
            out_recoded = out_base_name + '.recoded'                 
            util.run_command('%s --transpose --recode12 --reference-allele %s.mnr --out %s' % \
                           (plink_cmd_base, out_base_name, out_recoded))

            # Reload the recoded problem
            for ext in ('nof', 'tped', 'tfam'):
                os.rename(out_recoded + '.' + ext, out_base_name + '.' + ext)
            genotype = io_genotype.read('plink', 'genotype', tped=out_base_name + '.tped', load_ids=False)
        else:
            genotype = problem.genotype
            
        # Write problem to file in our (npz)
        io.write_npz(problem, out_base_name + '.npz')
        # Write genotypes Gaixin formats; she uses those separate files
        io_genotype.write('gaixin', genotype, options.out_gxn + '.gxn',
                          sample_id=problem.pedigree.sample_id_genotyped)
             
        # Convert plink tped to bed; delete the tped set
        util.run_command('%s --make-bed --out %s' % (plink_cmd_base, out_base_name))
        for ext in ('nof', 'pdg.tfam', 'tped', 'tfam', 'info'):
            os.remove(out_base_name + '.' + ext)
    except:
        traceback.print_exc(file=sys.stdout)
コード例 #11
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_create_from_file_no_sample_ids(self):
     g = io_genotype.read('plink',
                          'genotype',
                          tped=itu.GENOTYPE_SAMPLE + '.tped',
                          load_ids=False)
     self.__assert_genotype_set_stats_correct(g)
コード例 #12
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_create_from_file(self):
     g = io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_SAMPLE)
     self.__assert_genotype_set_stats_correct(g)
     self.__assert_snp_equals((22, 'rs1654', 0, 17596388), g.snp[7])
コード例 #13
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_create_from_file_no_sample_ids(self):
     g = io_genotype.read('plink', 'genotype', tped=itu.GENOTYPE_SAMPLE+'.tped', load_ids=False)
     self.__assert_genotype_set_stats_correct(g)
コード例 #14
0
ファイル: TestGenotype.py プロジェクト: orenlivne/ober
 def test_create_from_file(self):
     g = io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_SAMPLE)
     self.__assert_genotype_set_stats_correct(g)
     self.__assert_snp_equals((22, 'rs1654', 0, 17596388), g.snp[7])
コード例 #15
0
ファイル: io.py プロジェクト: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''

    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(
        name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped',
                                             prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file]
                   is not None]):
        raise ValueError(
            'Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files'
        )
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')

    # Load data
    print_location = lambda x: x if x else '-'

    if verbose:
        print 'Reading pedigree from %s, %s ...' % (
            print_location(pedigree),
            print_location(pedigree_genotyped),
        )
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)

    if verbose:
        print 'Reading genotype data from %s ...' % (
            print_location(genotype), )
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)

    if verbose:
        print 'Reading haplotype data from %s ...' % (
            print_location(haplotype), )
    h = io_genotype.read('plink', 'haplotype', tped=haplotype,
                         load_ids=False) if haplotype else None

    if verbose:
        print 'Reading error data from %s ...' % (print_location(error_file), )
    error = np.loadtxt(
        error_file) if error_file and os.path.isfile(error_file) else None

    if verbose:
        print 'Reading frame data from %s ...' % (
            print_location(frames_file), )
    frames = db_gene.snp.ld_graph.read_frames(
        frames_file) if frames_file else None

    lam = np.loadtxt(
        lam_file) if lam_file and os.path.isfile(lam_file) else None

    # info = ProblemInfo(p, g) if info is None else info
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose:
            print 'Reading problem info from %s ...' % (info, )
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem