Example #1
0
def main(args):
    file_species_pairs = []
    i = 1
    while i < len(args)-2:
        file_species_pairs.append([args[i], args[i+1]])
        i += 2

    cutoff = float(sys.argv[-2])
    plot_dir = sys.argv[-1]

    species2elms = {}
    virus2elms = {}
    # first grab virus ELMs
    for file, species in file_species_pairs:
        if file.find('flu') != -1:
            virus2elms[species] = utils.get_seq2count_dict(file, cutoff)
        else:
            species2elms[species] = True
    elms = {}
    for species in virus2elms:
        for elm in virus2elms[species]:
            elms[elm] = True
    for species in species2elms:
        species2elms[species] = utils.get_seq2count_dict_for_seqs(file, 
                                                                  cutoff,
                                                                  virus2elms)
    for virus in virus2elms:
        species2elms[virus] = virus2elms[virus]
    for elm in elms:
        if utils.check_ones(species2elms, elm):
            if utils_distance.distance_elms(species2elms['Sus_scrofa'][elm],
                                            species2elms['H_sapiens'][elm]) > float(-1) or utils_distance.distance_elms(species2elms['Sus_scrofa'][elm],
                                                                                                                        species2elms['Gallus_gallus'][elm]) > float(0):
                utils_plot.elm_host_barplot(species2elms, elm,
                                            os.path.join(plot_dir,
                                                         elm + '.virus_hosts.png'))
Example #2
0
#                                       mouse)
# print utils_distance.distance_species(monkey,
#                                       mouse)

species2dict = {}
virus2dict = {}

virus2dict['swineFlu'] = utils.get_seq2count_dict('results/flu_elmdict_swine',
                                                  float(.4))
virus2dict['chickenFlu'] = utils.get_seq2count_dict('results/flu_elmdict_chicken',
                                                    float(.4))
virus2dict['humanFlu'] = utils.get_seq2count_dict('results/flu_elmdict_human',
                                                  float(.4))
for g in ('H_sapiens', 'Gallus_gallus', 'Sus_scrofa'):
    species2dict[g] = utils.get_seq2count_dict_for_seqs('results/elmdict_'
                                                        + g + '.txt',
                                                        float(0),
                                                        virus2dict)
for v in virus2dict:
    species2dict[v] = virus2dict[v]

d = utils_distance.distance_matrix(species2dict)
elm_d = utils_distance.elm_distance_matrix(species2dict)
#for elm in elm_d:
#    for species_pair in elm_d[elm]:
#        print elm + '\t' + species_pair + '\t' + str(elm_d[elm][species_pair])
for s1, s2 in itertools.combinations(d.keys(), 2):
    print s1 + '\t' + s2 + '\t' + str(d[s1][s2])
utils_plot.distance_heatmap(elm_d, 'test.png')