예제 #1
0
def main(args=None):
    # loading taxa names
    taxa = load_taxa(args['<taxa>'])

    # loading KDEs
    KDEs = Utils.load_kde(args['<kde>'])

    # parsing KDEs
    kde_type = Utils.KDE_type(KDEs)

    # parsing KDE
    if kde_type == 1:
        KDEs_p = [[t, k] for t, k in KDEs if t in taxa]
    elif kde_type == 2:
        KDEs_p = {t: k for t, k in KDEs.items() if t in taxa}
    elif kde_type == 3:
        KDEs_p = {}
        for libID, v in KDEs_p.items():
            KDEs_pp = {t: k for t, k in v.items() if t in taxa}
            KDEs_p[libID] = KDEs_pp
            KDEs_pp = None
    elif kde_type == 4:
        KDEs_p = {}
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            KDE_bylib = {t: k for t, k in KDE_bylib.items() if t in taxa}
            KDEs_p[libID] = KDE_bylib
            KDE_bylib = None
    else:
        raise TypeError, 'KDE object type not recognized'

    # writing
    dill.dump(KDEs_p, sys.stdout)
예제 #2
0
def main(args=None):

    # load KDEs object
    sys.stderr.write('Loading KDEs...\n')
    KDEs = Utils.load_kde(args['<kde>'])
    
    # header
    if args['-s']:
        print '\t'.join(['lib_ID', 'taxon_ID', 'KDE_ID', 'min', 'percentile_5', 
                         'percentile_25', 'mean', 'median', 'percentile_75', 
                         'percentile_95', 'max', 'stdev'])


    # KDE type
    kde_type = Utils.KDE_type(KDEs)

    # parsing KDE
    if kde_type == 1: 
        if args['-n']:
            print len(KDEs)
            sys.exit()
        for x in KDEs:   
            if args['-s']:
                KDE_dataset_stats(x[1], x[0])
            else:
                print x[0]        
    elif kde_type == 2:
        if args['-n']:
            print len(KDEs.keys())
            sys.exit()
        for x,y in KDEs.items():
            if args['-s']:
                KDE_dataset_stats(y, x)
            else:
                print x
    elif kde_type == 3:
        if args['-n']:
            print '\t'.join(['library', 'N'])
            for libID,v in KDEs.items():
                print '\t'.join([str(x) for x in [libID, len(v.keys())]])
            sys.exit()
        for x,y in KDEs.items():
            for xx,yy in y.items(): 
                if args['-s']:
                    KDE_dataset_stats(yy, xx, libID=x)
                else:
                    print '\t'.join([x,xx])        
    elif kde_type == 4:
        for libID,filename in KDEs.items(): 
            KDE_bylib = Utils.load_kde(filename)
            if args['-n']:
                print len(KDE_bylib.keys())
                sys.exit()
            for x,y in KDE_bylib.items():
                if args['-s']:
                    KDE_dataset_stats(y, x)
                else:
                    print x
    else:
        raise TypeError, 'KDE object type not recognized'
예제 #3
0
def main(args=None):
    KDEs = Utils.load_kde(args['<kde>'])

    # KDE object type
    kde_type = Utils.KDE_type(KDEs)

    # info for KDEs
    print '\t'.join(['libID', 'taxon', 'bandwidth'])
    if kde_type == 1:
        for t, k in KDEs:
            print '\t'.join(['1', t, kde_factor(k)])
    elif kde_type == 2:
        for t, k in KDEs.items():
            print '\t'.join(['1', t, kde_factor(k)])
    elif kde_type == 3:
        for libID, x in KDEs.items():
            for t, k in x.items():
                print '\t'.join([libID, t, kde_factor(k)])
    elif kde_type == 4:
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            for t, k in KDE_bylib.items():
                print '\t'.join([libID, t, kde_factor(k)])
    else:
        raise TypeError, 'KDE object type not recognized'
예제 #4
0
def main(args=None):
    # input
    KDEs = Utils.load_kde(args['<kde>'])
    n = int(args['-n'])

    # KDE object type
    kde_type = Utils.KDE_type(KDEs)

    # sampling from KDEs
    vals = {}
    if kde_type == 1:
        vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs \
                if kde is not None}
    elif kde_type == 2:
        vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs.items() \
                if kde is not None}
    elif kde_type == 3:
        for x, y in KDEs.items():
            vals[x] = {taxon:kde.resample(n)[0,] for taxon,kde in y.items() \
                       if kde is not None}
    elif kde_type == 4:
        for libID, filename in KDEs.items():
            KDE_bylib = Utils.load_kde(filename)
            vals[libID] = {
                taxon: kde.resample(n)[0, ]
                for taxon, kde in KDE_bylib.items() if kde is not None
            }
    else:
        raise TypeError, 'KDE object type not recognized'

    # writing out results
    tbl = pd.concat([to_df(y, x) for x, y in vals.items()])
    tbl.to_csv(sys.stdout, sep='\t', index=False)
예제 #5
0
def test_kde():
    f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl')
    # load kde
    kde = Utils.load_kde(f)
    assert isinstance(kde, dict)
    # check type
    kde_type = Utils.KDE_type(kde)
    assert kde_type == 2
예제 #6
0
def main(args=None):
    # subsample with replacement arg
    #if args['-r'] is not None:
    #    args['-r'] = True
    #else:
    #    args['-r'] = False

    # load KDEs object
    sys.stderr.write('Loading KDEs...\n')
    KDEs = Utils.load_kde(args['<kde>'])

    # KDE type
    kde_type = Utils.KDE_type(KDEs)            
        
    # all taxa names in KDEs
    taxa = get_taxa(KDEs, kde_type, args['-a'])
    ntaxa = len(taxa)

    # subsampling (if needed)
    ## number to subsample
    nsub = None
    if args['-s'] is not None:
        nsub = int(args['-s'])
    elif args['-f'] is not None:
        nsub = float(args['-f']) * ntaxa
        nsub = int(nsub)
    elif args['-p'] is not None:
        nsub = float(args['-p']) / 100 * ntaxa
        nsub = int(nsub)        
    ## subsampling
    if nsub is not None:
        if nsub > ntaxa:
            args['-r'] = True
            msg = 'WARNING: nsub > ntaxa, sub-sampling with replacement!'
            sys.stderr.write(msg + '\n')
        taxa = np.random.choice(taxa, size=nsub, replace=args['-r'])
        msg = 'Subsampled {} taxa'
        sys.stderr.write(msg.format(nsub) + '\n')
    
    # writing to STDOUT
    for x in taxa:
        print x