def output_groups_by_species(min_group_size=2): all_species = kv.get_species_collections() groups_list = get_groups() groups_list.sort(key=len, reverse=True) groups_df = pd.DataFrame(data={n:0 for n in all_species}, index=[str(x+1) for x in range(0, len(groups_list))]) group_no = 0 for group in groups_list: if len(group) >= min_group_size: group_no += 1 species_in_group = [x[0] for x in group] for species in species_in_group: groups_df[species][group_no-1] = 1 groups_df.to_csv('groups_by_species.csv')
for pair in list_of_species_pairs: if pair[0] == pairs[0][0]: ax = fig.add_axes([1,ypos,1,1]) output_loc_hist(pair[0], pair[1], ax) plot_url = py.plot_mpl(fig) print plot_url # plt.xlabel("Position") # plt.ylabel("percent identity") # plt.savefig('/Users/KBLaptop/Desktop/try.pdf') if __name__ == '__main__': kv.mongo_init('more_genomes') os.chdir('/Users/KBLaptop/computation/kvasir/data/output/more_genomes/') ls = kv.get_species_collections() print ls ls.remove('Arthrobacter_arilaitensis_Re117') pairs = [] for pair in combinations(ls, 2): pairs.append((pair[0], pair[1])) plot_many(pairs) # if os.path.isfile('{}_{}.pdf'.format(pair[0], pair[1])): # continue # try: # output_loc_hist(pair[0], pair[1]) # except RuntimeError: # print "Couldn't compare {} and {}".format(pair[0], pair[1])