def main(): tm = bx.phylo.phast.TreeModel.from_file( open( sys.argv[1] ) ) t = bx.phylo.newick.newick_parser.parse_string( tm.tree ) names = sys.argv[2].split( ',' ) nucs = tm.alphabet background = tm.background print "#", ','.join( names ) if len( sys.argv ) > 3: cols = [] for line in open( sys.argv[3] ): fields = line.rstrip( "\r\n" ).split() col = fields[0] # HACK! col = col.replace( "N", "*" ) cols.append( ( fields[0], int( fields[1]) ) ) a = dict( zip( names, col ) ) lik = felsen( t, a, tm ) # print >>sys.stderr, col prob = lik_to_prob( lik, background, tm ) print "\t".join( fields + [ ' '.join( map( str, prob ) ) ] ) else: for rows in cookbook.cross_lists( *( [ nucs ] * len( names ) ) ): a = dict( zip( names, rows ) ) lik = felsen( t, a, tm ) prob = lik_to_prob( lik, background, tm ) print "\t".join( [ ''.join(rows), "?" , ' '.join( map( str, prob ) ) ] )
except: counts[ col ] = 1 # counts = [ ( value, key ) for key, value in counts.iteritems() ] # counts.sort() # counts.reverse() ## for count, col in counts: ## print "".join(col), count options, args = doc_optparse.parse( __doc__ ) wildcard = False if options.wildcard: wildcard = True max_wildcard = nspecies - 1 if options.maxwildcards: wildcard = True max_wildcard = int( options.maxwildcards ) nucs = "ACGT-" if wildcard: nucs += "*" for col in cross_lists( *( [ nucs ] * nspecies ) ): col = ''.join( col ) if wildcard and col.count( "*" ) > max_wildcard: continue if col.count( "-" ) == nspecies: continue print col, counts.get( col, 0 )
counts[col] = 1 # counts = [ ( value, key ) for key, value in counts.iteritems() ] # counts.sort() # counts.reverse() ## for count, col in counts: ## print "".join(col), count options, args = doc_optparse.parse(__doc__) wildcard = False if options.wildcard: wildcard = True max_wildcard = nspecies - 1 if options.maxwildcards: wildcard = True max_wildcard = int(options.maxwildcards) nucs = "ACGT-" if wildcard: nucs += "*" for col in cross_lists(*([nucs] * nspecies)): col = ''.join(col) if wildcard and col.count("*") > max_wildcard: continue if col.count("-") == nspecies: continue print col, counts.get(col, 0)