def analyze_consensus_clusters(): logging.debug('parse the gene names') data_filename = sys.argv[1] names = get_gene_names(data_filename) logging.debug('get clusters from the first tree') tree_filename = sys.argv[2] root = get_tree(tree_filename) root = treebuilder.center_and_sort_tree(root) first_clusters = treebuilder.get_clusters(root) logging.debug('get clusters from the second tree') tree_filename = sys.argv[3] root = get_tree(tree_filename) root = treebuilder.center_and_sort_tree(root) second_clusters = treebuilder.get_clusters(root) logging.debug('get consensus clusters') consensus = set(frozenset(c) for c in first_clusters) & set(frozenset(c) for c in second_clusters) logging.debug('show the corresponding genes') length_cluster_pairs = list(sorted((len(c), list(c)) for c in consensus)) for l, c in length_cluster_pairs: #print len(c) print ', '.join(names[x] for x in c)
def analyze_clusters(): """ First arg is data file, second arg is newick file. """ logging.debug('parse the gene names') data_filename = sys.argv[1] names = get_gene_names(data_filename) logging.debug('parse the tree') tree_filename = sys.argv[2] root = get_tree(tree_filename) logging.debug('center and sort the tree') root = treebuilder.center_and_sort_tree(root) logging.debug('get the clusters') clusters = treebuilder.get_clusters(root) logging.debug('show the corresponding genes') length_cluster_pairs = list(sorted((len(c), list(c)) for c in clusters)) for l, c in length_cluster_pairs: #print len(c) print ', '.join(names[x] for x in c)