with open('data/%s.csv' % dataset) as data_file:
    reader = csv.reader(data_file)
    reader.next()
    for route, lat, lon, genus, species, count in reader:
        route = (round(float(lat), 3), round(float(lon), 3))
        spname = '%s %s' % (genus, species)
        if not spname in all_species: continue
        count_data_spp.add(spname)
        count = int(count)
        if not route in routes: routes[route] = set()
        routes[route].add(spname)

data = {}
color_clusters(tree,
               threshold=0,
               draw=False,
               all_colors=xrange(len(tips)),
               color_attr='group',
               min_clade_size=1)
thresholds = [0] + sorted(
    set([
        s.percentileofscore(tree._distances, clade._med_distance)
        for clade in tree.find_elements() if hasattr(clade, '_med_distance')
    ]))
print thresholds
data = {}
for threshold in thresholds:
    if threshold > 50: break
    sys.stdout.write(str(threshold) + '...')
    sys.stdout.flush()
    color_clusters(tree,
                   threshold=threshold,
tips = tree.get_terminals()
all_species = {t.name: t for t in tips}

routes = {}
with open('fia.csv') as data_file:
    reader = csv.reader(data_file)
    reader.next()
    for lat, lon, genus, species, count in reader:
        spname = '%s %s' % (genus, species)
        if not spname in all_species: continue
        lat, lon = round(float(lat)), round(float(lon))
        count = int(count)
        route = lat, lon
        if not route in routes: routes[route] = set()
        routes[route].add(spname)

results = {}
for threshold in np.arange(0, 40, 5):
    print threshold
    results[threshold] = {}
    color_clusters(tree, threshold=threshold, draw=False, all_colors = xrange(len(tips)),
                   color_attr='group', min_clade_size=1)
    for route, spp in routes.iteritems():
        results[threshold][route] = set()
        for sp in spp:
            if sp in all_species and hasattr(all_species[sp], 'group'):
                results[threshold][route].add(all_species[sp].group)

with open('evolutionary_scale.pkl', 'w') as pickle_file:
    pkl.dump(results, pickle_file, -1)
import Bio.Phylo as bp
import geophy.cluster as g
import sys

try: tree_file = sys.argv[1]
except: tree_file = 'bbs_granivores.new'

t = bp.read(tree_file,'newick')

g.color_clusters(t, threshold=10)