def main(): tm = bx.phylo.phast.TreeModel.from_file( open( sys.argv[1] ) ) t = bx.phylo.newick.newick_parser.parse_string( tm.tree ) names = sys.argv[2].split( ',' ) nucs = tm.alphabet background = tm.background print "#", ','.join( names ) if len( sys.argv ) > 3: cols = [] for line in open( sys.argv[3] ): fields = line.rstrip( "\r\n" ).split() col = fields[0] # HACK! col = col.replace( "N", "*" ) cols.append( ( fields[0], int( fields[1]) ) ) a = dict( zip( names, col ) ) lik = felsen( t, a, tm ) # print >>sys.stderr, col prob = lik_to_prob( lik, background, tm ) print "\t".join( fields + [ ' '.join( map( str, prob ) ) ] ) else: for rows in cookbook.cross_lists( *( [ nucs ] * len( names ) ) ): a = dict( zip( names, rows ) ) lik = felsen( t, a, tm ) prob = lik_to_prob( lik, background, tm ) print "\t".join( [ ''.join(rows), "?" , ' '.join( map( str, prob ) ) ] )
counts[col] = 1 # counts = [ ( value, key ) for key, value in counts.iteritems() ] # counts.sort() # counts.reverse() ## for count, col in counts: ## print "".join(col), count options, args = cookbook.doc_optparse.parse(__doc__) wildcard = False if options.wildcard: wildcard = True max_wildcard = nspecies - 1 if options.maxwildcards: wildcard = True max_wildcard = int(options.maxwildcards) nucs = "ACGT-" if wildcard: nucs += "*" for col in cookbook.cross_lists(*([nucs] * nspecies)): col = "".join(col) if wildcard and col.count("*") > max_wildcard: continue if col.count("-") == nspecies: continue print col, counts.get(col, 0)