def main(args=None): # loading taxa names taxa = load_taxa(args['<taxa>']) # loading KDEs KDEs = Utils.load_kde(args['<kde>']) # parsing KDEs kde_type = Utils.KDE_type(KDEs) # parsing KDE if kde_type == 1: KDEs_p = [[t, k] for t, k in KDEs if t in taxa] elif kde_type == 2: KDEs_p = {t: k for t, k in KDEs.items() if t in taxa} elif kde_type == 3: KDEs_p = {} for libID, v in KDEs_p.items(): KDEs_pp = {t: k for t, k in v.items() if t in taxa} KDEs_p[libID] = KDEs_pp KDEs_pp = None elif kde_type == 4: KDEs_p = {} for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) KDE_bylib = {t: k for t, k in KDE_bylib.items() if t in taxa} KDEs_p[libID] = KDE_bylib KDE_bylib = None else: raise TypeError, 'KDE object type not recognized' # writing dill.dump(KDEs_p, sys.stdout)
def main(args=None): KDEs = Utils.load_kde(args['<kde>']) # KDE object type kde_type = Utils.KDE_type(KDEs) # info for KDEs print '\t'.join(['libID', 'taxon', 'bandwidth']) if kde_type == 1: for t, k in KDEs: print '\t'.join(['1', t, kde_factor(k)]) elif kde_type == 2: for t, k in KDEs.items(): print '\t'.join(['1', t, kde_factor(k)]) elif kde_type == 3: for libID, x in KDEs.items(): for t, k in x.items(): print '\t'.join([libID, t, kde_factor(k)]) elif kde_type == 4: for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) for t, k in KDE_bylib.items(): print '\t'.join([libID, t, kde_factor(k)]) else: raise TypeError, 'KDE object type not recognized'
def main(args=None): # load KDEs object sys.stderr.write('Loading KDEs...\n') KDEs = Utils.load_kde(args['<kde>']) # header if args['-s']: print '\t'.join(['lib_ID', 'taxon_ID', 'KDE_ID', 'min', 'percentile_5', 'percentile_25', 'mean', 'median', 'percentile_75', 'percentile_95', 'max', 'stdev']) # KDE type kde_type = Utils.KDE_type(KDEs) # parsing KDE if kde_type == 1: if args['-n']: print len(KDEs) sys.exit() for x in KDEs: if args['-s']: KDE_dataset_stats(x[1], x[0]) else: print x[0] elif kde_type == 2: if args['-n']: print len(KDEs.keys()) sys.exit() for x,y in KDEs.items(): if args['-s']: KDE_dataset_stats(y, x) else: print x elif kde_type == 3: if args['-n']: print '\t'.join(['library', 'N']) for libID,v in KDEs.items(): print '\t'.join([str(x) for x in [libID, len(v.keys())]]) sys.exit() for x,y in KDEs.items(): for xx,yy in y.items(): if args['-s']: KDE_dataset_stats(yy, xx, libID=x) else: print '\t'.join([x,xx]) elif kde_type == 4: for libID,filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) if args['-n']: print len(KDE_bylib.keys()) sys.exit() for x,y in KDE_bylib.items(): if args['-s']: KDE_dataset_stats(y, x) else: print x else: raise TypeError, 'KDE object type not recognized'
def main(args=None): # input KDEs = Utils.load_kde(args['<kde>']) n = int(args['-n']) # KDE object type kde_type = Utils.KDE_type(KDEs) # sampling from KDEs vals = {} if kde_type == 1: vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs \ if kde is not None} elif kde_type == 2: vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs.items() \ if kde is not None} elif kde_type == 3: for x, y in KDEs.items(): vals[x] = {taxon:kde.resample(n)[0,] for taxon,kde in y.items() \ if kde is not None} elif kde_type == 4: for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) vals[libID] = { taxon: kde.resample(n)[0, ] for taxon, kde in KDE_bylib.items() if kde is not None } else: raise TypeError, 'KDE object type not recognized' # writing out results tbl = pd.concat([to_df(y, x) for x, y in vals.items()]) tbl.to_csv(sys.stdout, sep='\t', index=False)
def test_kde(): f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl') # load kde kde = Utils.load_kde(f) assert isinstance(kde, dict) # check type kde_type = Utils.KDE_type(kde) assert kde_type == 2
def test_file_status(): # real file f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl') Utils.is_file(f) Utils.checkExists(f) Utils.checkEmpty(f) # fake file f = os.path.join(data_dir, 'DOES_NOT_EXIST') with pytest.raises(IOError): Utils.is_file(f) with pytest.raises(IOError): Utils.checkExists(f)
def main(args=None): args['--walk'] = int(args['--walk']) # dist params as dict args['--dist_params'] = Utils.parseKeyValueString(args['--dist_params']) otu_tbl = OTU_table.from_csv(args['<OTU_table_file>'], sep='\t') # if --samp_min, get min comm size, set dist to uniform w/ same low & high if args['--samp_min']: min_size = otu_tbl.get_comm_size_stats()[0] assert min_size > 0, '--samp min is < 1. Nothing to sample!' args['--dist_params'] = {'low': min_size, 'high': min_size} # setting subsampling size distribution otu_tbl.set_samp_dist(samp_dist=args['--dist'], samp_dist_params=args['--dist_params']) # subsampling df = otu_tbl.subsample(no_replace=args['--no-replace'], walk=args['--walk'], min_size=args['--min_size'], max_size=args['--max_size'], base=args['--base']) # writing out table df.to_csv(sys.stdout, sep='\t', index=False)
def main(args=None): for param in [ 'birth_rate', 'death_rate', 'birth_rate_sd', 'death_rate_sd' ]: param = '--' + param args[param] = float(args[param]) # loading taxon list if args['<genome_list>'] is not None: taxa = Utils.parseGenomeList(args['<genome_list>'], check_exists=False) taxa = [x[0] for x in taxa] elif args['<comm_file>'] is not None: comm = CommTable.from_csv(args['<comm_file>'], sep='\t') taxa = comm.get_unique_taxon_names() # init dendropy taxon namespace taxa = dendropy.TaxonNamespace(taxa, label='taxa') # simulating tree if args['--star']: tree = star_tree(taxon_set=taxa) else: tree = birth_death(args['--birth_rate'], args['--death_rate'], birth_rate_sd=args['--birth_rate_sd'], death_rate_sd=args['--death_rate_sd'], num_extant_tips=len(taxa)) # writing tree outfmt = args['--outfmt'].lower() psbl_fmts = ['newick', 'nexus'] assert outfmt in psbl_fmts, 'output file format not recognized.' +\ ' Possible formats: {}'.format(', '.join(psbl_fmts)) tree.write_to_stream(sys.stdout, outfmt)
def get_taxa(KDEs, kde_type, all_taxa=None): """Getting taxa names from KDE object. """ # parsing KDE if kde_type == 1: if all_taxa is not None: taxa = [k[0] for k in KDEs] else: taxa = [k[0] for k in KDEs if k[1] is not None] elif kde_type == 2: if all_taxa is not None: taxa = KDEs.keys() else: taxa = [k for k,v in KDEs.items() if v is not None] elif kde_type == 3: taxa = [] for libID,v in KDEs.items(): if all_taxa is not None: taxa += v.keys() else: taxa += [k for k,vv in v.items() if v is not None] taxa = list(set(taxa)) elif kde_type == 4: taxa = [] for libID,filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) if all_taxa is not None: taxa += KDE_bylib.keys() else: taxa += [k for k,v in KDE_bylib.items() if v is not None] else: raise TypeError, 'KDE object type not recognized' return taxa
def main(args=None): # subsample with replacement arg #if args['-r'] is not None: # args['-r'] = True #else: # args['-r'] = False # load KDEs object sys.stderr.write('Loading KDEs...\n') KDEs = Utils.load_kde(args['<kde>']) # KDE type kde_type = Utils.KDE_type(KDEs) # all taxa names in KDEs taxa = get_taxa(KDEs, kde_type, args['-a']) ntaxa = len(taxa) # subsampling (if needed) ## number to subsample nsub = None if args['-s'] is not None: nsub = int(args['-s']) elif args['-f'] is not None: nsub = float(args['-f']) * ntaxa nsub = int(nsub) elif args['-p'] is not None: nsub = float(args['-p']) / 100 * ntaxa nsub = int(nsub) ## subsampling if nsub is not None: if nsub > ntaxa: args['-r'] = True msg = 'WARNING: nsub > ntaxa, sub-sampling with replacement!' sys.stderr.write(msg + '\n') taxa = np.random.choice(taxa, size=nsub, replace=args['-r']) msg = 'Subsampled {} taxa' sys.stderr.write(msg.format(nsub) + '\n') # writing to STDOUT for x in taxa: print x