コード例 #1
0
ファイル: io.py プロジェクト: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''
    
    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]):
        raise ValueError('Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files')
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')
    
    # Load data
    print_location = lambda x : x if x else '-'
    
    if verbose: print 'Reading pedigree from %s, %s ...' % (print_location(pedigree), print_location(pedigree_genotyped),)
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)
    
    if verbose:  print 'Reading genotype data from %s ...' % (print_location(genotype),)
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)
    
    if verbose: print 'Reading haplotype data from %s ...' % (print_location(haplotype),)
    h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None
    
    if verbose: print 'Reading error data from %s ...' % (print_location(error_file),)
    error = np.loadtxt(error_file) if error_file and os.path.isfile(error_file) else None
    
    if verbose: print 'Reading frame data from %s ...' % (print_location(frames_file),)
    frames = db_gene.snp.ld_graph.read_frames(frames_file) if frames_file else None
    
    lam = np.loadtxt(lam_file) if lam_file and os.path.isfile(lam_file) else None
    
    # info = ProblemInfo(p, g) if info is None else info    
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose: 
            print 'Reading problem info from %s ...' % (info,)
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem
コード例 #2
0
ファイル: io.py プロジェクト: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''

    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(
        name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped',
                                             prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file]
                   is not None]):
        raise ValueError(
            'Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files'
        )
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')

    # Load data
    print_location = lambda x: x if x else '-'

    if verbose:
        print 'Reading pedigree from %s, %s ...' % (
            print_location(pedigree),
            print_location(pedigree_genotyped),
        )
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)

    if verbose:
        print 'Reading genotype data from %s ...' % (
            print_location(genotype), )
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)

    if verbose:
        print 'Reading haplotype data from %s ...' % (
            print_location(haplotype), )
    h = io_genotype.read('plink', 'haplotype', tped=haplotype,
                         load_ids=False) if haplotype else None

    if verbose:
        print 'Reading error data from %s ...' % (print_location(error_file), )
    error = np.loadtxt(
        error_file) if error_file and os.path.isfile(error_file) else None

    if verbose:
        print 'Reading frame data from %s ...' % (
            print_location(frames_file), )
    frames = db_gene.snp.ld_graph.read_frames(
        frames_file) if frames_file else None

    lam = np.loadtxt(
        lam_file) if lam_file and os.path.isfile(lam_file) else None

    # info = ProblemInfo(p, g) if info is None else info
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose:
            print 'Reading problem info from %s ...' % (info, )
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem