def output_groups_by_species(min_group_size=2):
    all_species = kv.get_species_collections()
    groups_list = get_groups()
    groups_list.sort(key=len, reverse=True)

    groups_df = pd.DataFrame(data={n:0 for n in all_species}, index=[str(x+1) for x in range(0, len(groups_list))])

    group_no = 0
    for group in groups_list:
        if len(group) >= min_group_size:
            group_no += 1
            species_in_group = [x[0] for x in group]
            for species in species_in_group:
                groups_df[species][group_no-1] = 1
    groups_df.to_csv('groups_by_species.csv')
    for pair in list_of_species_pairs:    
        if pair[0] == pairs[0][0]:
            ax = fig.add_axes([1,ypos,1,1])
            output_loc_hist(pair[0], pair[1], ax)

    plot_url = py.plot_mpl(fig)
    print plot_url

    # plt.xlabel("Position")
    # plt.ylabel("percent identity")
    # plt.savefig('/Users/KBLaptop/Desktop/try.pdf')

if __name__ == '__main__':
    kv.mongo_init('more_genomes')
    os.chdir('/Users/KBLaptop/computation/kvasir/data/output/more_genomes/')
    ls = kv.get_species_collections()
    print ls
    ls.remove('Arthrobacter_arilaitensis_Re117')
    pairs = []
    for pair in combinations(ls, 2):
        pairs.append((pair[0], pair[1]))
    plot_many(pairs)


    #     if os.path.isfile('{}_{}.pdf'.format(pair[0], pair[1])):
    #         continue
    #     try:
    #         output_loc_hist(pair[0], pair[1])
    #     except RuntimeError:
    #         print "Couldn't compare {} and {}".format(pair[0], pair[1])