def main(args): file_species_pairs = [] i = 1 while i < len(args)-2: file_species_pairs.append([args[i], args[i+1]]) i += 2 cutoff = float(sys.argv[-2]) plot_dir = sys.argv[-1] species2elms = {} virus2elms = {} # first grab virus ELMs for file, species in file_species_pairs: if file.find('flu') != -1: virus2elms[species] = utils.get_seq2count_dict(file, cutoff) else: species2elms[species] = True elms = {} for species in virus2elms: for elm in virus2elms[species]: elms[elm] = True for species in species2elms: species2elms[species] = utils.get_seq2count_dict_for_seqs(file, cutoff, virus2elms) for virus in virus2elms: species2elms[virus] = virus2elms[virus] for elm in elms: if utils.check_ones(species2elms, elm): if utils_distance.distance_elms(species2elms['Sus_scrofa'][elm], species2elms['H_sapiens'][elm]) > float(-1) or utils_distance.distance_elms(species2elms['Sus_scrofa'][elm], species2elms['Gallus_gallus'][elm]) > float(0): utils_plot.elm_host_barplot(species2elms, elm, os.path.join(plot_dir, elm + '.virus_hosts.png'))
# mouse) # print utils_distance.distance_species(monkey, # mouse) species2dict = {} virus2dict = {} virus2dict['swineFlu'] = utils.get_seq2count_dict('results/flu_elmdict_swine', float(.4)) virus2dict['chickenFlu'] = utils.get_seq2count_dict('results/flu_elmdict_chicken', float(.4)) virus2dict['humanFlu'] = utils.get_seq2count_dict('results/flu_elmdict_human', float(.4)) for g in ('H_sapiens', 'Gallus_gallus', 'Sus_scrofa'): species2dict[g] = utils.get_seq2count_dict_for_seqs('results/elmdict_' + g + '.txt', float(0), virus2dict) for v in virus2dict: species2dict[v] = virus2dict[v] d = utils_distance.distance_matrix(species2dict) elm_d = utils_distance.elm_distance_matrix(species2dict) #for elm in elm_d: # for species_pair in elm_d[elm]: # print elm + '\t' + species_pair + '\t' + str(elm_d[elm][species_pair]) for s1, s2 in itertools.combinations(d.keys(), 2): print s1 + '\t' + s2 + '\t' + str(d[s1][s2]) utils_plot.distance_heatmap(elm_d, 'test.png')