def plot(cluster, funcs, labels, filename="fingerprints.pdf", plot_title='Scores by Cluster', cmap='Paired', filter=lambda a: True, plot_pop=False, run_dir="../run/"): assert len(labels) == len(funcs), "must have a label for every func" # load in cluster data ac = rc.load(cluster) clusters = rc.load_clusters(cluster) # compress clusters # TODO: replace with call to agent.cluster.compress(700) clusters = [[i, clust] for i, clust in enumerate(clusters) if len(clust) > 700] ids, clusters = zip(*clusters) num_clusters = len(clusters) # build up cluster data data = [[ np.average([func(a) for a in clust if filter(a)]) for clust in clusters ] for func in funcs] err = [[ np.std([func(a) for a in clust if filter(a)]) / np.sqrt(len([func(a) for a in clust if filter(a)])) for clust in clusters ] for func in funcs] # plot actual bars for each function ind = np.arange(num_clusters) width = 1.0 / (len(funcs) + 1) bars = [] for offset, datum in enumerate(data): b = bar(ind + (offset * width), datum, width, color=cm.Paired(float(offset) / len(funcs)), yerr=err[offset], capsize=1.5) bars.append(b) # generate final plot title(plot_title, weight='black') ylabel('Normalized Value', weight='bold') ylim(ymin=0, ymax=1.0) xlabel('Cluster', weight='bold') xticks(ind + (width * len(funcs) / 2), ["%d" % ids[i] for i, clust in enumerate(clusters)]) legend([b[0] for b in bars], labels, loc='upper left') savefig(filename, dpi=200)
def plot(cluster_file, run_dir='../run/', anim_path=None, frame_path=None, min_size=None): # configure default animation and frame path if anim_path is None: anim_path = 'anim' if frame_path is None: frame_path = os.path.join(anim_path, 'frames') # ensure paths exist if not os.path.isdir(anim_path): os.mkdir(anim_path) if not os.path.isdir(frame_path): os.mkdir(frame_path) # load clusters clusters = rc.load_clusters(cluster_file, sort='random') # compress, if a threshold is set if min_size is not None: clusters = compress_clusters(clusters, min_size) # calculate number of clusters n_clusters = len(clusters) agent_cluster = rc.agent_cluster(clusters) # establish cluster object as a shared memory object. #manager = Manager() #cluster = manager.dict(agent_cluster) start = 1 stop = 300 # grouping to keep memory footprint reasonably small. # TODO: Fix this using iterators and multiprocessing.Pool(). Running into # major issues with serialization of Agent class that is preventing # plot_step from being picklable. for begin in xrange(start, stop, 1500): end = min(begin + 1499, stop) p = get_population_during_time(begin, end) for t in xrange(begin, end, 3): plot_step(t, p, agent_cluster, n_clusters, frame_path, cmap='Paired') subprocess.call(( "mencoder mf://%s/*.png -o %s/output.avi -mf type=png:w=600:h=800:fps=30 -ovc x264 -x264encopts qp=20" % (frame_path, anim_path)).split())
def plot(cluster, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True, draw_legend=False, radius='2.25', sym=None): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population() pops = [0 for i in range(30000)] cluster_pops = [] cluster_pop_max = [] for clust in range(len(clusters)): cluster_pops.append(pops[:]) cluster_pop_max.append([0,0,-1,0]) for clust,agents in enumerate(clusters): for agent in agents: a = Agent(agent) for i in range(a.birth, a.death): cluster_pops[clust][i] += 1 if cluster_pop_max[clust][2] == -1: cluster_pop_max[clust][2] = i if i > cluster_pop_max[clust][3]: cluster_pop_max[clust][3] = i if cluster_pops[clust][i] > cluster_pop_max[clust][0]: cluster_pop_max[clust][0] = cluster_pops[clust][i] cluster_pop_max[clust][1] = i lines=[] for i,clust in enumerate(cluster_pops): lines.append(pylab.plot(range(30000),clust, label=("%d: k=%d" % (i, len(clusters[i]))), color=pylab.cm.Paired(float(i)/len(clusters)))) if draw_legend: pylab.figlegend(lines, ["%d: k=%d" % (i, len(clust)) for i,clust in enumerate(clusters)], 'center right', ncol=((len(clusters)/35)+1), prop=dict(size=6)) else: print "should not draw!!!" title = r"Cluster Population ($\epsilon$ = %s, %d clusters)" % (radius, len(clusters)) pylab.title(title, weight='black') pylab.xlabel("Time", weight='bold') pylab.ylabel("Population Size", weight='bold') if sym is not None: pylab.figtext(0,.954, '(%s)' % sym, size=6, weight='black') pylab.savefig(filename, dpi=300) print 'cluster, totalPop, start, peak, stop, maxPop' for clust,agents in enumerate(clusters): print clust, len(agents), cluster_pop_max[clust][2], cluster_pop_max[clust][1], cluster_pop_max[clust][3]+1, cluster_pop_max[clust][0]
def plot(cluster_file, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True, draw_legend=False, radius='2.25', sym=None, run_dir='../run/'): """ Creates a line plot showing population per cluster """ # retrieve cluster data from cluster file clusters = rc.load_clusters(cluster_file) # grab cluster population p = get_population(run_dir=run_dir) lines = [] for cluster, agents in enumerate(clusters): pop_by_time = list(repeat(0, 30000)) for agent in agents: a = Agent(agent) for i in range(a.birth, a.death): pop_by_time[i] += 1 lines.append( pylab.plot(range(30000), pop_by_time, label=("%d: k=%d" % (i, len(clusters[cluster]))), color=pylab.cm.Paired(float(cluster) / len(clusters)))) if draw_legend: pylab.figlegend(lines, [ "%d: k=%d" % (i, len(cluster)) for i, cluster in enumerate(clusters) ], 'center right', ncol=((len(clusters) / 35) + 1), prop=dict(size=6)) title = r"Cluster Population ($\epsilon$ = %s, %d clusters)" % ( radius, len(clusters)) pylab.title(title, weight='black') pylab.xlabel("Time", weight='bold') pylab.ylabel("Population Size", weight='bold') if sym is not None: pylab.figtext(0, .954, '(%s)' % sym, size=6, weight='black') pylab.savefig(filename, dpi=300)
def plot(cluster, filename="plot.png", plot_title='', cmap='RdBu_r'): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population()[400:4000] p = [a for a in p if a.birth != 30000] #normalize genomes print "normalizing genomes..." gs = zscore([a.genome[:] for a in p], axis=1) for i, agent in enumerate(p): agent.ngenome = gs[i] del agent.genome print "calculating distances..." gene_results = [] geo_results = [] pool = Pool() for i,agent1 in enumerate(p): print agent1.id, "to everything else" x = [(agent1.ngenome, agent2.ngenome) for agent2 in p[i+1:]] r = pool.map(dists_wrapper, x) gene_results.extend(r) y = [(agent1.positions[agent1.birth+1], agent2.positions[agent2.birth+1]) for agent2 in p[i+1:]] r = pool.map(dists_wrapper, y) geo_results.extend(r) #for agent2 in p[i+1:]: # results.append(dists(agent1, agent2)) del agent1.ngenome del agent1.positions xs = geo_results ys = gene_results print "plotting..." hexbin(xs, ys, cmap=cmap, alpha=0.6, edgecolors='none', mincnt=25) title(plot_title) xlabel('Geographic Distance') ylabel('Genetic Distance') xlim( (0,142) ) ylim( (1000,6000) ) colorbar() savefig(filename, dpi=200)
def plot(cluster, filename="plot.png", plot_title='', cmap='RdBu_r'): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population()[400:4000] p = [a for a in p if a.birth != 30000] #normalize genomes print "normalizing genomes..." gs = zscore([a.genome[:] for a in p], axis=1) for i, agent in enumerate(p): agent.ngenome = gs[i] del agent.genome print "calculating distances..." gene_results = [] geo_results = [] pool = Pool() for i, agent1 in enumerate(p): print agent1.id, "to everything else" x = [(agent1.ngenome, agent2.ngenome) for agent2 in p[i + 1:]] r = pool.map(dists_wrapper, x) gene_results.extend(r) y = [(agent1.positions[agent1.birth + 1], agent2.positions[agent2.birth + 1]) for agent2 in p[i + 1:]] r = pool.map(dists_wrapper, y) geo_results.extend(r) #for agent2 in p[i+1:]: # results.append(dists(agent1, agent2)) del agent1.ngenome del agent1.positions xs = geo_results ys = gene_results print "plotting..." hexbin(xs, ys, cmap=cmap, alpha=0.6, edgecolors='none', mincnt=25) title(plot_title) xlabel('Geographic Distance') ylabel('Genetic Distance') xlim((0, 142)) ylim((1000, 6000)) colorbar() savefig(filename, dpi=200)
def plot(cluster, filename="plot-filter.eps", funcs=(lambda a: a.id), plot_title='Scores by Cluster', cmap='Paired', filter=lambda a: True, labels=('id'), plot_pop=False): assert len(labels) == len(funcs), "must have a label for every func" ac = rc.load(cluster) clusters = rc.load_clusters(cluster) clusters = [[i, clust] for i, clust in enumerate(clusters) if len(clust) > 700] ids, clusters = zip(*clusters) num_clusters = len(clusters) ind = np.arange(num_clusters) if plot_pop: width = 1.0 / (len(funcs) + 2) else: width = 1.0 / (len(funcs) + 1) data = [[ np.average([func(a) for a in clust if filter(a)]) for clust in clusters ] for func in funcs] if plot_pop: data.append([len(clust) / 25346.0 for clust in clusters]) labels.append('population') bars = [] for offset, datum in enumerate(data): b = bar(ind + (offset * width), datum, width, color=cm.Paired(float(offset) / len(funcs))) bars.append(b) ylabel('normalized value') title(plot_title) ylim(ymin=0) xticks(ind + (width * len(funcs) / 2), [ "%d: k=%d" % (ids[i], len(clust)) for i, clust in enumerate(clusters) ], fontsize=8) legend([b[0] for b in bars], labels, loc='upper left', prop=dict(size=6)) savefig(filename, dpi=200)
def plot(cluster, funcs, labels, filename="fingerprints.pdf", plot_title='Scores by Cluster', cmap='Paired', filter=lambda a: True, plot_pop=False, run_dir="../run/"): assert len(labels) == len(funcs), "must have a label for every func" # load in cluster data ac = rc.load(cluster) clusters = rc.load_clusters(cluster) # compress clusters # TODO: replace with call to agent.cluster.compress(700) clusters = [[i, clust] for i,clust in enumerate(clusters) if len(clust) > 700] ids, clusters = zip(*clusters) num_clusters = len(clusters) # build up cluster data data = [[np.average([func(a) for a in clust if filter(a)]) for clust in clusters] for func in funcs] err = [[np.std([func(a) for a in clust if filter(a)]) / np.sqrt(len([func(a) for a in clust if filter(a)])) for clust in clusters] for func in funcs] # plot actual bars for each function ind = np.arange(num_clusters) width = 1.0 / (len(funcs) + 1) bars = [] for offset,datum in enumerate(data): b = bar(ind+(offset*width), datum, width, color=cm.Paired(float(offset)/len(funcs)), yerr=err[offset], capsize=1.5) bars.append(b) # generate final plot title(plot_title, weight='black') ylabel('Normalized Value', weight='bold') ylim(ymin=0,ymax=1.0) xlabel('Cluster', weight='bold') xticks(ind + ( width*len(funcs) / 2), ["%d" % ids[i] for i, clust in enumerate(clusters)]) legend([b[0] for b in bars], labels, loc='upper left') savefig(filename, dpi=200)
def plot(cluster, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population() p.sort(key=lambda a:-len(clusters[ac[a.id]])) xs = [(a.birth + a.death) / 2 for a in p if filter(a)] ys = [func(a) for a in p if filter(a)] cs = [ac[a.id] for a in p if filter(a)] scatter(xs,ys, cmap=cmap, c=cs, s=10, alpha=0.6, edgecolors='none') title(plot_title) xlim(0,30000) ylim(ymin=0) savefig(filename, dpi=200)
def plot(cluster_file, run_dir='../run/', anim_path=None, frame_path=None, min_size=None): # configure default animation and frame path if anim_path is None: anim_path = 'anim' if frame_path is None: frame_path = os.path.join(anim_path, 'frames') # ensure paths exist if not os.path.isdir(anim_path): os.mkdir(anim_path) if not os.path.isdir(frame_path): os.mkdir(frame_path) # load clusters clusters = rc.load_clusters(cluster_file, sort='random') # compress, if a threshold is set if min_size is not None: clusters = compress_clusters(clusters, min_size) # calculate number of clusters n_clusters = len(clusters) agent_cluster = rc.agent_cluster(clusters) # establish cluster object as a shared memory object. #manager = Manager() #cluster = manager.dict(agent_cluster) start = 1 stop = 300 # grouping to keep memory footprint reasonably small. # TODO: Fix this using iterators and multiprocessing.Pool(). Running into # major issues with serialization of Agent class that is preventing # plot_step from being picklable. for begin in xrange(start, stop, 1500): end = min(begin + 1499, stop) p = get_population_during_time(begin, end) for t in xrange(begin, end, 3): plot_step(t, p, agent_cluster, n_clusters, frame_path, cmap='Paired') subprocess.call(("mencoder mf://%s/*.png -o %s/output.avi -mf type=png:w=600:h=800:fps=30 -ovc x264 -x264encopts qp=20" % (frame_path, anim_path) ).split())
def plot(cluster, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population() p.sort(key=lambda a: -len(clusters[ac[a.id]])) xs = [(a.birth + a.death) / 2 for a in p if filter(a)] ys = [func(a) for a in p if filter(a)] cs = [ac[a.id] for a in p if filter(a)] scatter(xs, ys, cmap=cmap, c=cs, s=10, alpha=0.6, edgecolors='none') title(plot_title) xlim(0, 30000) ylim(ymin=0) savefig(filename, dpi=200)
def plot(cluster_file, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True, draw_legend=False, radius='2.25', sym=None, run_dir='../run/'): """ Creates a line plot showing population per cluster """ # retrieve cluster data from cluster file clusters = rc.load_clusters(cluster_file) # grab cluster population p = get_population(run_dir=run_dir) lines=[] for cluster, agents in enumerate(clusters): pop_by_time = list(repeat(0, 30000)) for agent in agents: a = Agent(agent) for i in range(a.birth, a.death): pop_by_time[i] += 1 lines.append( pylab.plot(range(30000), pop_by_time, label=("%d: k=%d" % (i, len(clusters[cluster]))), color=pylab.cm.Paired(float(cluster)/len(clusters)))) if draw_legend: pylab.figlegend(lines, ["%d: k=%d" % (i, len(cluster)) for i,cluster in enumerate(clusters)], 'center right', ncol=((len(clusters)/35)+1), prop=dict(size=6)) title = r"Cluster Population ($\epsilon$ = %s, %d clusters)" % (radius, len(clusters)) pylab.title(title, weight='black') pylab.xlabel("Time", weight='bold') pylab.ylabel("Population Size", weight='bold') if sym is not None: pylab.figtext(0,.954, '(%s)' % sym, size=6, weight='black') pylab.savefig(filename, dpi=300)
def plot(cluster, filename="plot-filter.eps", funcs=(lambda a: a.id), plot_title='Scores by Cluster', cmap='Paired', filter=lambda a: True, labels=('id'), plot_pop=False): assert len(labels) == len(funcs), "must have a label for every func" ac = rc.load(cluster) clusters = rc.load_clusters(cluster) clusters = [[i, clust] for i,clust in enumerate(clusters) if len(clust) > 700] ids, clusters = zip(*clusters) num_clusters = len(clusters) ind = np.arange(num_clusters) if plot_pop: width = 1.0 / (len(funcs) + 2) else: width = 1.0 / (len(funcs) + 1) data = [[np.average([func(a) for a in clust if filter(a)]) for clust in clusters] for func in funcs] if plot_pop: data.append([len(clust) / 25346.0 for clust in clusters]) labels.append('population') bars = [] for offset,datum in enumerate(data): b = bar(ind+(offset*width), datum, width, color=cm.Paired(float(offset)/len(funcs))) bars.append(b) ylabel('normalized value') title(plot_title) ylim(ymin=0) xticks(ind + ( width*len(funcs) / 2), ["%d: k=%d" % (ids[i], len(clust)) for i, clust in enumerate(clusters)], fontsize=8) legend([b[0] for b in bars], labels, loc='upper left', prop=dict(size=6)) savefig(filename, dpi=200)
def plot(cluster, filename="plot.png", func=lambda a: a.id, plot_title='', cmap='Paired', filter=lambda a: True, draw_legend=False, radius='2.25', sym=None): ac = rc.load(cluster) clusters = rc.load_clusters(cluster) p = get_population() pops = [0 for i in range(30000)] cluster_pops = [] cluster_pop_max = [] for clust in range(len(clusters)): cluster_pops.append(pops[:]) cluster_pop_max.append([0, 0, -1, 0]) for clust, agents in enumerate(clusters): for agent in agents: a = Agent(agent) for i in range(a.birth, a.death): cluster_pops[clust][i] += 1 if cluster_pop_max[clust][2] == -1: cluster_pop_max[clust][2] = i if i > cluster_pop_max[clust][3]: cluster_pop_max[clust][3] = i if cluster_pops[clust][i] > cluster_pop_max[clust][0]: cluster_pop_max[clust][0] = cluster_pops[clust][i] cluster_pop_max[clust][1] = i lines = [] for i, clust in enumerate(cluster_pops): lines.append( pylab.plot(range(30000), clust, label=("%d: k=%d" % (i, len(clusters[i]))), color=pylab.cm.Paired(float(i) / len(clusters)))) if draw_legend: pylab.figlegend( lines, ["%d: k=%d" % (i, len(clust)) for i, clust in enumerate(clusters)], 'center right', ncol=((len(clusters) / 35) + 1), prop=dict(size=6)) else: print "should not draw!!!" title = r"Cluster Population ($\epsilon$ = %s, %d clusters)" % ( radius, len(clusters)) pylab.title(title, weight='black') pylab.xlabel("Time", weight='bold') pylab.ylabel("Population Size", weight='bold') if sym is not None: pylab.figtext(0, .954, '(%s)' % sym, size=6, weight='black') pylab.savefig(filename, dpi=300) print 'cluster, totalPop, start, peak, stop, maxPop' for clust, agents in enumerate(clusters): print clust, len(agents), cluster_pop_max[clust][2], cluster_pop_max[ clust][1], cluster_pop_max[clust][3] + 1, cluster_pop_max[clust][0]
os.mkdir(frame_path) ''' print "unpickling clustsers ..." c = Cluster.load(sys.argv[-1]) print "sorting clustsers ..." c.sort() cluster = c.agents del c print cluster exit() ''' import readcluster clusters = readcluster.load_clusters(sys.argv[-1]) new_clust = [clust for clust in clusters if len(clust) > 700] small_clust = [] for clust in clusters: if len(clust) <= 700: small_clust.extend(clust) new_clust.append(small_clust) cluster = readcluster.agent_cluster(new_clust) n_clusters = len(set(cluster.values())) print n_clusters
cluster_pops[clust][i] += 1 # set cluster start if cluster_pop_max[clust][2] == -1: cluster_pop_max[clust][2] = i # set cluster stop if i > cluster_pop_max[clust][3]: cluster_pop_max[clust][3] = i # set max Population if cluster_pops[clust][i] > cluster_pop_max[clust][0]: cluster_pop_max[clust][0] = cluster_pops[clust][i] cluster_pop_max[clust][1] = i print 'cluster, totalPop, start, peak, stop, maxPop' for clust, agents in enumerate(clusters): print clust, len(agents), cluster_pop_max[clust][2], cluster_pop_max[ clust][1], cluster_pop_max[clust][3] + 1, cluster_pop_max[clust][0] if __name__ == '__main__': import sys # get clusters cluster_file = sys.argv[-1] ac = rc.load(cluster_file) clusters = rc.load_clusters(cluster_file) print_timeline(clusters)
os.mkdir(frame_path) ''' print "unpickling clustsers ..." c = Cluster.load(sys.argv[-1]) print "sorting clustsers ..." c.sort() cluster = c.agents del c print cluster exit() ''' import readcluster clusters = readcluster.load_clusters(sys.argv[-1]) new_clust = [clust for clust in clusters if len(clust) > 700] small_clust = [] for clust in clusters: if len(clust) <= 700: small_clust.extend(clust) new_clust.append(small_clust) cluster = readcluster.agent_cluster(new_clust) n_clusters = len(set(cluster.values())) print n_clusters print "processing..."
a = Agent(agent) for i in range(a.birth, a.death): cluster_pops[clust][i] += 1 # set cluster start if cluster_pop_max[clust][2] == -1: cluster_pop_max[clust][2] = i # set cluster stop if i > cluster_pop_max[clust][3]: cluster_pop_max[clust][3] = i # set max Population if cluster_pops[clust][i] > cluster_pop_max[clust][0]: cluster_pop_max[clust][0] = cluster_pops[clust][i] cluster_pop_max[clust][1] = i print 'cluster, totalPop, start, peak, stop, maxPop' for clust,agents in enumerate(clusters): print clust, len(agents), cluster_pop_max[clust][2], cluster_pop_max[clust][1], cluster_pop_max[clust][3]+1, cluster_pop_max[clust][0] if __name__ == '__main__': import sys # get clusters cluster_file = sys.argv[-1] ac = rc.load(cluster_file) clusters = rc.load_clusters(cluster_file) print_timeline(clusters)