nargs='?', default=None, type=str, help="the output summary file\n" "[stdout if not present]") return vars(p.parse_args()) if __name__ == "__main__": args = read_params(sys.argv) cscores = collections.defaultdict(set) fwmarkers = {} maps = collections.defaultdict(set) tree = ppa.PpaTree(args['taxonomy']) clades2terms = ppa.clades2terms(tree.tree) clades2taxa = dict([(clade.full_name, set([-int(taxon.name[4:]) for taxon in taxa])) for clade, taxa in clades2terms.items()]) for l in open(args['cscores']): gene_seed, clade, n, n_tot, coreness = l.strip().split('\t') gene_seed, clade, n, n_tot, coreness = int(gene_seed), clade, int( n), int(n_tot), float(coreness) cscores[gene_seed].add((clade, n, n_tot, coreness)) for i, l in enumerate(open(args['fwmarkers'])): taxa_id, gene_seed, clade, n, n_tot, coreness, n_ext_seeds, n_ext_taxa, uniqueness, ext_taxa = l.strip( ).split('\t')
default=None, type=str, help="the input tree") p.add_argument('out_file', nargs='?', default=None, type=str, help="the output file (b2zipped if ending with '.bz2')\n" "[stdout if not present]") p.add_argument( '-n', action='store_true', help="Distances normalized with respect to the total branch length") return vars(p.parse_args()) if __name__ == "__main__": args = read_params(sys.argv) ppatree = ppa.PpaTree(args['intree']) dists = ppa.dist_matrix(ppatree.tree) tbl = ppatree.tree.total_branch_length() if args['n'] else 1.0 #tbl = ppatree.tree.total_branch_length()-1.0 if args['n'] else 1.0 with utils.openw(args['out_file']) as out: for k1, v1 in dists.items(): for k2, v2 in v1.items(): if k1 < k2: out.write("\t".join([k1, k2, str(v2 / tbl)]) + "\n")
def read_params( args ): p = ap.ArgumentParser( description='Select top markers') p.add_argument( 'infile', nargs='?', default=None, type=str, help= "the input marker file [stdin if not present]") p.add_argument('outfile', nargs='?', default=None, type=str, help= "the output core file [stdout if not present]") p.add_argument('-n', metavar="Maximum number of markers to be selected" "for each clade", default=100, type=int ) p.add_argument('--th', metavar="Threshold on markerness", default=None, type=str ) return vars( p.parse_args() ) if __name__ == "__main__": args = read_params( sys.argv ) tree = ppa.PpaTree( None ) markers = tree.select_markers( args['infile'], markerness_th = args['th'], max_markers = args['n'] ) with open( args['outfile'], "w" ) as outf: for clade in markers: for m in clade: outf.write( "\n".join( m ) + "\n" )
p = ap.ArgumentParser(description='Profile ChocoPhlAn genes\n') p.add_argument( '--markers', required = True, default=None, type=str ) p.add_argument( '--euk_taxonomy', required = True, default=None, type=str ) #p.add_argument( '--mic_taxonomy', required = True, default=None, type=str ) p.add_argument('out', nargs='?', default=None, type=str, help= "the output summary file\n" "[stdout if not present]") return vars( p.parse_args() ) if __name__ == "__main__": args = read_params( sys.argv ) cen2data = {} etree = ppa.PpaTree( args['euk_taxonomy'] ) #vtree = ppa.PpaTree( args['vir_taxonomy'] ) #mtree = ppa.PpaTree( args['mic_taxonomy'] ) #vall = set([a.name for a in vtree.tree.get_terminals()]) #mall = set([int(a.name[3:]) for a in mtree.tree.get_terminals()]) eall = set([a.name for a in etree.tree.get_terminals()]) lin = (l.split('\t') for l in open(args['markers'])) for d1,cen,taxa,tax,d2,d3,coreness,d4,d5,d6,d7,tin,tout,d8,tsin,tsout in lin: tsin,tsout = set(["t__"+a for a in tsin.strip().split(":") if a]),set(["t__"+b for b in tsout.strip().split(":") if b]) #tsin,tsout = set([int(a) for a in tsin.strip().split(":") if a]),set([int(b) for b in tsout.strip().split(":") if b]) cen2data[int(cen)] = {'taxa':int(taxa),'tax':tax,'coreness':float(coreness),'tsin':tsin,'tsout':tsout} tax2cen = collections.defaultdict( set ) for k,v in cen2data.items():
p.add_argument('--mic_taxonomy', required=True, default=None, type=str) p.add_argument('out', nargs='?', default=None, type=str, help="the output summary file\n" "[stdout if not present]") return vars(p.parse_args()) if __name__ == "__main__": args = read_params(sys.argv) cen2data = {} vtree = ppa.PpaTree(args['vir_taxonomy']) mtree = ppa.PpaTree(args['mic_taxonomy']) vall = set([a.name for a in vtree.tree.get_terminals()]) #mall = set([int(a.name[3:]) for a in mtree.tree.get_terminals()]) mall = set([a.name for a in mtree.tree.get_terminals()]) lin = (l.split('\t') for l in open(args['markers'])) for d1, cen, taxa, tax, d2, d3, coreness, d4, d5, d6, d7, tin, tout, d8, tsin, tsout in lin: tsin, tsout = set( ["t__" + a for a in tsin.strip().split(":") if a]), set(["t__" + b for b in tsout.strip().split(":") if b]) #tsin,tsout = set([int(a) for a in tsin.strip().split(":") if a]),set([int(b) for b in tsout.strip().split(":") if b]) cen2data[int(cen)] = { 'taxa': int(taxa), 'tax': tax, 'coreness': float(coreness),