コード例 #1
0
ファイル: KDE_parse.py プロジェクト: nick-youngblut/SIPSim
def main(args=None):
    # loading taxa names
    taxa = load_taxa(args['<taxa>'])

    # loading KDEs
    KDEs = Utils.load_kde(args['<kde>'])

    # parsing KDEs
    kde_type = Utils.KDE_type(KDEs)

    # parsing KDE
    if kde_type == 1:
        KDEs_p = [[t, k] for t, k in KDEs if t in taxa]
    elif kde_type == 2:
        KDEs_p = {t: k for t, k in KDEs.items() if t in taxa}
    elif kde_type == 3:
        KDEs_p = {}
        for libID, v in KDEs_p.items():
            KDEs_pp = {t: k for t, k in v.items() if t in taxa}
            KDEs_p[libID] = KDEs_pp
            KDEs_pp = None
    elif kde_type == 4:
        KDEs_p = {}
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            KDE_bylib = {t: k for t, k in KDE_bylib.items() if t in taxa}
            KDEs_p[libID] = KDE_bylib
            KDE_bylib = None
    else:
        raise TypeError, 'KDE object type not recognized'

    # writing
    dill.dump(KDEs_p, sys.stdout)
コード例 #2
0
def main(args=None):
    KDEs = Utils.load_kde(args['<kde>'])

    # KDE object type
    kde_type = Utils.KDE_type(KDEs)

    # info for KDEs
    print '\t'.join(['libID', 'taxon', 'bandwidth'])
    if kde_type == 1:
        for t, k in KDEs:
            print '\t'.join(['1', t, kde_factor(k)])
    elif kde_type == 2:
        for t, k in KDEs.items():
            print '\t'.join(['1', t, kde_factor(k)])
    elif kde_type == 3:
        for libID, x in KDEs.items():
            for t, k in x.items():
                print '\t'.join([libID, t, kde_factor(k)])
    elif kde_type == 4:
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            for t, k in KDE_bylib.items():
                print '\t'.join([libID, t, kde_factor(k)])
    else:
        raise TypeError, 'KDE object type not recognized'
コード例 #3
0
def main(args=None):

    # load KDEs object
    sys.stderr.write('Loading KDEs...\n')
    KDEs = Utils.load_kde(args['<kde>'])
    
    # header
    if args['-s']:
        print '\t'.join(['lib_ID', 'taxon_ID', 'KDE_ID', 'min', 'percentile_5', 
                         'percentile_25', 'mean', 'median', 'percentile_75', 
                         'percentile_95', 'max', 'stdev'])


    # KDE type
    kde_type = Utils.KDE_type(KDEs)

    # parsing KDE
    if kde_type == 1: 
        if args['-n']:
            print len(KDEs)
            sys.exit()
        for x in KDEs:   
            if args['-s']:
                KDE_dataset_stats(x[1], x[0])
            else:
                print x[0]        
    elif kde_type == 2:
        if args['-n']:
            print len(KDEs.keys())
            sys.exit()
        for x,y in KDEs.items():
            if args['-s']:
                KDE_dataset_stats(y, x)
            else:
                print x
    elif kde_type == 3:
        if args['-n']:
            print '\t'.join(['library', 'N'])
            for libID,v in KDEs.items():
                print '\t'.join([str(x) for x in [libID, len(v.keys())]])
            sys.exit()
        for x,y in KDEs.items():
            for xx,yy in y.items(): 
                if args['-s']:
                    KDE_dataset_stats(yy, xx, libID=x)
                else:
                    print '\t'.join([x,xx])        
    elif kde_type == 4:
        for libID,filename in KDEs.items(): 
            KDE_bylib = Utils.load_kde(filename)
            if args['-n']:
                print len(KDE_bylib.keys())
                sys.exit()
            for x,y in KDE_bylib.items():
                if args['-s']:
                    KDE_dataset_stats(y, x)
                else:
                    print x
    else:
        raise TypeError, 'KDE object type not recognized'
コード例 #4
0
def main(args=None):
    # input
    KDEs = Utils.load_kde(args['<kde>'])
    n = int(args['-n'])

    # KDE object type
    kde_type = Utils.KDE_type(KDEs)

    # sampling from KDEs
    vals = {}
    if kde_type == 1:
        vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs \
                if kde is not None}
    elif kde_type == 2:
        vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs.items() \
                if kde is not None}
    elif kde_type == 3:
        for x, y in KDEs.items():
            vals[x] = {taxon:kde.resample(n)[0,] for taxon,kde in y.items() \
                       if kde is not None}
    elif kde_type == 4:
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            vals[libID] = {
                taxon: kde.resample(n)[0, ]
                for taxon, kde in KDE_bylib.items() if kde is not None
            }
    else:
        raise TypeError, 'KDE object type not recognized'

    # writing out results
    tbl = pd.concat([to_df(y, x) for x, y in vals.items()])
    tbl.to_csv(sys.stdout, sep='\t', index=False)
コード例 #5
0
def test_kde():
    f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl')
    # load kde
    kde = Utils.load_kde(f)
    assert isinstance(kde, dict)
    # check type
    kde_type = Utils.KDE_type(kde)
    assert kde_type == 2
コード例 #6
0
def test_file_status():
    # real file
    f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl')
    Utils.is_file(f)
    Utils.checkExists(f)
    Utils.checkEmpty(f)
    # fake file
    f = os.path.join(data_dir, 'DOES_NOT_EXIST')
    with pytest.raises(IOError):
        Utils.is_file(f)
    with pytest.raises(IOError):
        Utils.checkExists(f)
コード例 #7
0
def main(args=None):
    args['--walk'] = int(args['--walk'])

    # dist params as dict
    args['--dist_params'] = Utils.parseKeyValueString(args['--dist_params'])

    otu_tbl = OTU_table.from_csv(args['<OTU_table_file>'], sep='\t')

    # if --samp_min, get min comm size, set dist to uniform w/ same low & high
    if args['--samp_min']:
        min_size = otu_tbl.get_comm_size_stats()[0]
        assert min_size > 0, '--samp min is < 1. Nothing to sample!'
        args['--dist_params'] = {'low': min_size, 'high': min_size}

    # setting subsampling size distribution
    otu_tbl.set_samp_dist(samp_dist=args['--dist'],
                          samp_dist_params=args['--dist_params'])

    # subsampling
    df = otu_tbl.subsample(no_replace=args['--no-replace'],
                           walk=args['--walk'],
                           min_size=args['--min_size'],
                           max_size=args['--max_size'],
                           base=args['--base'])

    # writing out table
    df.to_csv(sys.stdout, sep='\t', index=False)
コード例 #8
0
ファイル: Tree_sim.py プロジェクト: nick-youngblut/SIPSim
def main(args=None):
    for param in [
            'birth_rate', 'death_rate', 'birth_rate_sd', 'death_rate_sd'
    ]:
        param = '--' + param
        args[param] = float(args[param])

    # loading taxon list
    if args['<genome_list>'] is not None:
        taxa = Utils.parseGenomeList(args['<genome_list>'], check_exists=False)
        taxa = [x[0] for x in taxa]
    elif args['<comm_file>'] is not None:
        comm = CommTable.from_csv(args['<comm_file>'], sep='\t')
        taxa = comm.get_unique_taxon_names()

    # init dendropy taxon namespace
    taxa = dendropy.TaxonNamespace(taxa, label='taxa')

    # simulating tree
    if args['--star']:
        tree = star_tree(taxon_set=taxa)
    else:
        tree = birth_death(args['--birth_rate'],
                           args['--death_rate'],
                           birth_rate_sd=args['--birth_rate_sd'],
                           death_rate_sd=args['--death_rate_sd'],
                           num_extant_tips=len(taxa))

    # writing tree
    outfmt = args['--outfmt'].lower()
    psbl_fmts = ['newick', 'nexus']
    assert outfmt in psbl_fmts, 'output file format not recognized.' +\
        ' Possible formats: {}'.format(', '.join(psbl_fmts))
    tree.write_to_stream(sys.stdout, outfmt)
コード例 #9
0
def get_taxa(KDEs, kde_type, all_taxa=None):
    """Getting taxa names from KDE object.
    """
    # parsing KDE
    if kde_type == 1: 
        if all_taxa is not None:
            taxa = [k[0] for k in KDEs]            
        else:
            taxa = [k[0] for k in KDEs if k[1] is not None]
    elif kde_type == 2:
        if all_taxa is not None:
            taxa = KDEs.keys()        
        else:
            taxa = [k for k,v in KDEs.items() if v is not None]
    elif kde_type == 3:
        taxa = []
        for libID,v in KDEs.items():
            if all_taxa is not None:
                taxa += v.keys()  
            else:
                taxa += [k for k,vv in v.items() if v is not None]
        taxa = list(set(taxa))            
    elif kde_type == 4:
        taxa = []
        for libID,filename in KDEs.items(): 
            KDE_bylib = Utils.load_kde(filename)
            if all_taxa is not None:
                taxa += KDE_bylib.keys()        
            else:
                taxa += [k for k,v in KDE_bylib.items() if v is not None]
    else:
        raise TypeError, 'KDE object type not recognized'

    return taxa
コード例 #10
0
def main(args=None):
    # subsample with replacement arg
    #if args['-r'] is not None:
    #    args['-r'] = True
    #else:
    #    args['-r'] = False

    # load KDEs object
    sys.stderr.write('Loading KDEs...\n')
    KDEs = Utils.load_kde(args['<kde>'])

    # KDE type
    kde_type = Utils.KDE_type(KDEs)            
        
    # all taxa names in KDEs
    taxa = get_taxa(KDEs, kde_type, args['-a'])
    ntaxa = len(taxa)

    # subsampling (if needed)
    ## number to subsample
    nsub = None
    if args['-s'] is not None:
        nsub = int(args['-s'])
    elif args['-f'] is not None:
        nsub = float(args['-f']) * ntaxa
        nsub = int(nsub)
    elif args['-p'] is not None:
        nsub = float(args['-p']) / 100 * ntaxa
        nsub = int(nsub)        
    ## subsampling
    if nsub is not None:
        if nsub > ntaxa:
            args['-r'] = True
            msg = 'WARNING: nsub > ntaxa, sub-sampling with replacement!'
            sys.stderr.write(msg + '\n')
        taxa = np.random.choice(taxa, size=nsub, replace=args['-r'])
        msg = 'Subsampled {} taxa'
        sys.stderr.write(msg.format(nsub) + '\n')
    
    # writing to STDOUT
    for x in taxa:
        print x