with open('data/%s.csv' % dataset) as data_file: reader = csv.reader(data_file) reader.next() for route, lat, lon, genus, species, count in reader: route = (round(float(lat), 3), round(float(lon), 3)) spname = '%s %s' % (genus, species) if not spname in all_species: continue count_data_spp.add(spname) count = int(count) if not route in routes: routes[route] = set() routes[route].add(spname) data = {} color_clusters(tree, threshold=0, draw=False, all_colors=xrange(len(tips)), color_attr='group', min_clade_size=1) thresholds = [0] + sorted( set([ s.percentileofscore(tree._distances, clade._med_distance) for clade in tree.find_elements() if hasattr(clade, '_med_distance') ])) print thresholds data = {} for threshold in thresholds: if threshold > 50: break sys.stdout.write(str(threshold) + '...') sys.stdout.flush() color_clusters(tree, threshold=threshold,
tips = tree.get_terminals() all_species = {t.name: t for t in tips} routes = {} with open('fia.csv') as data_file: reader = csv.reader(data_file) reader.next() for lat, lon, genus, species, count in reader: spname = '%s %s' % (genus, species) if not spname in all_species: continue lat, lon = round(float(lat)), round(float(lon)) count = int(count) route = lat, lon if not route in routes: routes[route] = set() routes[route].add(spname) results = {} for threshold in np.arange(0, 40, 5): print threshold results[threshold] = {} color_clusters(tree, threshold=threshold, draw=False, all_colors = xrange(len(tips)), color_attr='group', min_clade_size=1) for route, spp in routes.iteritems(): results[threshold][route] = set() for sp in spp: if sp in all_species and hasattr(all_species[sp], 'group'): results[threshold][route].add(all_species[sp].group) with open('evolutionary_scale.pkl', 'w') as pickle_file: pkl.dump(results, pickle_file, -1)
import Bio.Phylo as bp import geophy.cluster as g import sys try: tree_file = sys.argv[1] except: tree_file = 'bbs_granivores.new' t = bp.read(tree_file,'newick') g.color_clusters(t, threshold=10)