Exemple #1
0
    seen_elms = defaultdict(dict)
    for host in ls_of_hosts:
        host_elmCounts[host] = defaultdict(utils.init_zero)
        with open(results_dir + 'elmdict_' + host + '.redo') as f:
            for line in f:
                (elm, seq, count, fq) = line.strip().split('\t')
                if elm in use_elms:
                    host_elmCounts[host][elm] += int(count)
                    seen_elms[elm][host] = True
    #use_elms = {}
    #for elm in seen_elms:
#        if len(seen_elms[elm]) == len(ls_of_hosts):
    #    use_elms[elm] = True
   # print len(use_elms)
    return (host_elmCounts, use_elms)

use_elms = {}
with open(use_elms_file) as f:
    for line in f:
        (elm, stuff) = line.strip().split('\t')
        use_elms[elm] = True

hosts = global_settings.GENOMES
if use_freqs == 'T':
    host_elmCounts, elms = get_host_freqs(hosts, use_elms)
else:
    host_elmCounts, elms = get_host_counts(hosts, use_elms)
host_vecs = utils.mk_count_vecs(host_elmCounts, elms)
host_dists = utils.mk_count_dists(host_vecs)
utils_plot.phylogeny_js(out_file, host_dists)
        use_elms[elm] = True

do_clustering = True
if distance_file == 'NA':
    do_clustering = False

if do_clustering:
    dis_file = os.path.join(results_dir, distance_file)
    mapping = utils.get_clusters(dis_file, dis_cutoff_init,
                                 dis_cutoff_meta)
else:
    mapping = {}
    
counts = utils.count_host_elmSeqs(global_settings.TEST_GENOMES,
                                  do_clustering, mapping,
                                  results_dir, use_elms, suffix)

ls = []
for host in counts:
    ls.append(counts[host])
all_elmSeqs = {}
#all_elmSeqs = utils_graph.intersectLists(ls)
for host in counts:
    for elmSeq in counts[host]:
        all_elmSeqs[elmSeq] = True

host_vecs = utils.mk_count_vecs(counts, all_elmSeqs)
host_dists = utils.mk_count_dists(host_vecs)
utils_plot.phylogeny_js(os.path.join(results_dir,
                                     out_file), host_dists)