seen_elms = defaultdict(dict) for host in ls_of_hosts: host_elmCounts[host] = defaultdict(utils.init_zero) with open(results_dir + 'elmdict_' + host + '.redo') as f: for line in f: (elm, seq, count, fq) = line.strip().split('\t') if elm in use_elms: host_elmCounts[host][elm] += int(count) seen_elms[elm][host] = True #use_elms = {} #for elm in seen_elms: # if len(seen_elms[elm]) == len(ls_of_hosts): # use_elms[elm] = True # print len(use_elms) return (host_elmCounts, use_elms) use_elms = {} with open(use_elms_file) as f: for line in f: (elm, stuff) = line.strip().split('\t') use_elms[elm] = True hosts = global_settings.GENOMES if use_freqs == 'T': host_elmCounts, elms = get_host_freqs(hosts, use_elms) else: host_elmCounts, elms = get_host_counts(hosts, use_elms) host_vecs = utils.mk_count_vecs(host_elmCounts, elms) host_dists = utils.mk_count_dists(host_vecs) utils_plot.phylogeny_js(out_file, host_dists)
use_elms[elm] = True do_clustering = True if distance_file == 'NA': do_clustering = False if do_clustering: dis_file = os.path.join(results_dir, distance_file) mapping = utils.get_clusters(dis_file, dis_cutoff_init, dis_cutoff_meta) else: mapping = {} counts = utils.count_host_elmSeqs(global_settings.TEST_GENOMES, do_clustering, mapping, results_dir, use_elms, suffix) ls = [] for host in counts: ls.append(counts[host]) all_elmSeqs = {} #all_elmSeqs = utils_graph.intersectLists(ls) for host in counts: for elmSeq in counts[host]: all_elmSeqs[elmSeq] = True host_vecs = utils.mk_count_vecs(counts, all_elmSeqs) host_dists = utils.mk_count_dists(host_vecs) utils_plot.phylogeny_js(os.path.join(results_dir, out_file), host_dists)