コード例 #1
0
ファイル: utils.py プロジェクト: halfdanrump/MarketSimulation
def load_d10d11():
	import IO
	from pandas import concat
	fit10, par10, gen10, ids10 = IO.load_pickled_generation_dataframe('d10')
	fit11, par11, gen11, ids11 = IO.load_pickled_generation_dataframe('d11')
	par10['sc_nAgents'] = 150
	par11['ssmm_nAgents'] = 52
	par = concat([par10, par11])
	fit = concat([fit10, fit11])
	return fit, par
コード例 #2
0
def issue_101_plot_pars_vs_fitness(dataset, overshoot_threshold, preloaded_data = None):
	from plotting import get_pretty_xy_plot, make_pretty_scatter_plot
	from numpy import where


	def get_plots_to_make(fitness_types):
		plots_to_make = list()
		for fitness_type in fitness_types:
			for stat in stats:
				plots_to_make.append((fitness_type, stat))
		return plots_to_make


	def mkplot(all_data, groupby, plots_to_make):
		g = all_data.groupby(groupby)
		#x = g.groups.keys()
		s = all_data.sort(groupby)
		sorted_x, index_order = zip(*sorted(zip(g.groups.keys(), range(len(g.groups.keys())))))
		for attr, stat in plots_to_make:
			print groupby, attr, stat
			y = getattr(g[attr],stat)()
			filename = '%s%s__vs__%s(%s)'%(folder, groupby, attr, stat)
			ax, fig = get_pretty_xy_plot(sorted_x, y, groupby, '%s (%s)'%(attr, stat), filename, g[attr].std()/2, save_figure = False)
			filename = '%s%s__vs__%s(%s)_scatter'%(folder, groupby, attr, stat)
			make_pretty_scatter_plot(s[groupby], s[attr], groupby, '%s (%s)'%(attr, stat), filename, ax=ax, fig=fig)
	
	def run_analysis(groups, data, plots_to_make):
		for groupby in groups:
			mkplot(data, groupby, plots_to_make)

	
	folder = make_issue_specific_figure_folder('101_pars_vs_fits', dataset)
	stats = ['mean']

	if dataset == 'd10d11':
		f, p = utils.load_d10d11()
	else:
		f,p,g, i=IO.load_pickled_generation_dataframe(dataset_name=dataset)
		if 'dataset' == 'd10':
			p['sc_nAgents'] = 150
		elif 'dataset' == 'd11':
			p['ssmm_nAgents'] = 52

	

	if preloaded_data is None: 
		fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	else:
		try:
			fit = preloaded_data['fit']
			par = preloaded_data['par']
		except KeyError, e:
			print "Provide dict with keys 'fit' and 'par' containing dataframes for fit and par data"
			print e
			sys.exit(1)
コード例 #3
0
def issue_55_calc_cluster_stats(dataset, n_clusters, gamma, load_from_file = False):
	from data_analysis import reduce_npoints_kmeans, outlier_detection_with_SVM, calculate_stats_for_dataframe, calculate_pca
	from sklearn.cluster import KMeans
	from utils import get_group_vector_for_reduced_dataset, export_stats_dict_as_tex
	from plotting import make_scatter_plot_for_labelled_data
	#from scipy.stats import f_oneway
	#from numpy import where
	#from sklearn.preprocessing import scale

	def reduce_outlier_cluster_stats(data, data_target, data_name, gamma):
		reduced, cluster_assignment_o2r, km_r = reduce_npoints_kmeans(data, dataset, data_name, n_datapoints=1000, load_from_file=load_from_file)	
		inliers_idx_r, outliers_idx_r = outlier_detection_with_SVM(reduced, kernel='rbf', gamma=gamma, outlier_percentage=0.01)
		kmeans = KMeans(n_clusters = n_clusters)
		kmeans.fit(reduced.iloc[inliers_idx_r, :])
		indexes_i, labels_i =  get_group_vector_for_reduced_dataset(inliers_idx_r, cluster_assignment_o2r, cluster_assignment_r2g = kmeans.labels_)
		print DataFrame(kmeans.cluster_centers_, columns=data.columns)

		all_data = concat([par_data, fit_data], axis=1)
		stats = calculate_stats_for_dataframe(all_data.iloc[indexes_i,:], labels_i)
		export_stats_dict_as_tex(dataset, stats, data_name)
		#groups = map(lambda x: scale(data_target.iloc[indexes_i[where(labels_i==x)]]), range(n_clusters))
		#fval, pval = f_oneway(*groups)
		#print "P-vals for %s clusters: %s"%(data_name, pval)
		transformed_data, pca, components  = calculate_pca(data.iloc[indexes_i,:], n_components=3, normalize = True)	
		filename = figure_save_path + dataset + 'isse55_1_clusters_in_PCA_%s_space.png'%(data_name)
		colormap = brewer2mpl.get_map('Set2', 'Qualitative', n_clusters, reverse=True)
		print "Making scatter plot of K-means clusters of %s data for dataset %s"%(data_name, dataset)
		make_scatter_plot_for_labelled_data(data_frame=transformed_data, x_name='d1', y_name='d2', labels=labels_i, filename=filename, colormap = colormap, legend=True)
		#fitness_groups = map(lambda x: data.iloc[indexes_i[where(labels_i==x)]], range(n_clusters))
		
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)
	reduce_outlier_cluster_stats(par_data, fit_data, 'parameter', gamma=gamma[0])
	reduce_outlier_cluster_stats(fit_data, par_data, 'fitness', gamma=gamma[1])
コード例 #4
0
def issue_26_plot_pca_and_cluster(dataset, n_clusters):
	"""
	PCA and Kmeans for dataset 1
	"""
	from data_analysis import calculate_pca
	from sklearn.cluster import KMeans
	from plotting import make_color_grouped_scatter_plot, make_scatter_plot_for_labelled_data
	
	def do_for_dataset(data, data_name):
		transformed_data, pca, components  = calculate_pca(data, n_components=3, normalize = True)
		colormap = brewer2mpl.get_map('RdBu', 'diverging', 4, reverse=True)
		filename = figure_save_path + dataset + '_issue_26_1_%s_PCA_3components.png'%data_name
		print "Making scatter plot of PCA decompositions of %s data for dataset %s"%(data_name, dataset)
		make_color_grouped_scatter_plot(data_frame=transformed_data, x_name='d1', y_name='d2', color_by='d3', filename=filename, colormap=colormap)
		
		kmeans = KMeans(n_clusters = n_clusters)
		kmeans.fit(transformed_data.values)
		colormap = brewer2mpl.get_map('Set2', 'Qualitative', n_clusters, reverse=True)
		filename = figure_save_path + dataset + '_issue_26_2_%s_clusters_in_PCA_space.png'%data_name
		print "Making scatter plot of K-means clusters of %s data for dataset %s"%(data_name, dataset)
		make_scatter_plot_for_labelled_data(data_frame=transformed_data, x_name='d1', y_name='d2', labels=kmeans.labels_, filename=filename, colormap = colormap, legend=True)
	
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)
	do_for_dataset(fit_data, 'fitness')
	do_for_dataset(par_data, 'parameter')
コード例 #5
0
def test(dataset, overshoot_threshold):
	from numpy import where, zeros
	from sklearn.neighbors.kde import KernelDensity
	folder = make_issue_specific_figure_folder('108 cluster after removing outliers', dataset)
	fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	o = where(fit.overshoot > overshoot_threshold)[0]
	#not_o = where(fit.overshoot <= overshoot_threshold)[0]
	par = par.drop(o)
	fit = fit.drop(o)
	g1 = par.groupby('ssmm_nAgents').groups.keys()
	g2 = par.groupby('ssmm_latency_mu').groups.keys()
	#stdev_mean = zeros((len(g1), len(g2)))
	data = DataFrame(columns=['ssmm_nAgents', 'ssmm_latency_mu', 'stdev_mean'])
	for a, ssmm_nAgents in enumerate(g1):
		print ssmm_nAgents
		for l, ssmm_latency_mu in enumerate(g2):
			row = dict()
			try:
				row['stdev_mean'] = fit[(par['ssmm_latency_mu'] == ssmm_latency_mu) & (par['ssmm_nAgents'] == ssmm_nAgents)]['stdev'].mean()
				row['ssmm_nAgents'] = ssmm_nAgents
				row['ssmm_latency_mu'] = ssmm_latency_mu
				#print row
				data = data.append(row, ignore_index = True)
			except TypeError:
				print "ARGHS"

	X, Y = np.meshgrid(g1.groups.keys(), g2.groups.keys())
	xy = np.vstack([Y.ravel(), X.ravel()]).T
	return data
コード例 #6
0
def issue_29_reduce_and_affinity(dataset, affinity_damping, load_clusters_from_file = False):
	from data_analysis import reduce_npoints_kmeans, calculate_pca
	from sklearn.cluster import AffinityPropagation
	from sklearn.preprocessing import scale
	from plotting import make_color_grouped_scatter_plot
	from plotting import make_scatter_plot_for_labelled_data

	"""
	Use KMeans on fitness data to reduce number of datapoints and then use affinity propagation
	"""
	def do_issue(data, data_name):
		reduced_points, labels, km = reduce_npoints_kmeans(dataframe = data, dataset_name = dataset, data_name=data_name, n_datapoints = 1000, load_from_file = False)
		transformed_data, pca, components = calculate_pca(reduced_points, n_components=3)
		colormap = brewer2mpl.get_map('RdBu', 'diverging', 4, reverse=True)
		filename = figure_save_path + dataset + '_issue_29_1_%s_reduced_number_of_points.png'%data_name
		print "Making scatter plot of %s data for dataset %s, where the number of points have been reduced by K-Means clustering"%(data_name, dataset)
		make_color_grouped_scatter_plot(data_frame=transformed_data, x_name='d1', y_name='d2', color_by='d3', filename=filename, colormap=colormap)

		ap = AffinityPropagation(damping=affinity_damping)
		ap.fit(reduced_points)
		print "Making scatter plot of Affinity Propagation clusters of %s data for dataset %s"%(data_name, dataset)
		filename = figure_save_path + dataset + '_issue_29_2_%s_affinity.png'%data_name
		make_scatter_plot_for_labelled_data(data_frame=transformed_data, x_name='d1', y_name='d2', labels=ap.labels_, filename=filename, colormap = colormap, legend=True)	
	
		

	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)
	do_issue(fit_data, 'fitness')
	do_issue(par_data, 'parameter')
コード例 #7
0
def issue_21_basic_scatter_plots(dataset):
	"""
	Makes scatter plots of fitness
	"""
	from plotting import make_color_grouped_scatter_plot
	folder = make_issue_specific_figure_folder('21_scatter_plots', dataset)
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)
	
	#colormap = brewer2mpl.get_map('YlOrRd', 'Sequential', 9, reverse=True)
	colormap = brewer2mpl.get_map('Spectral', 'Diverging', 9, reverse=True)
	print "Making scatter plots of fitness data for dataset %s"%dataset
	filename = folder + 'a.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='overshoot', y_name='time_to_reach_new_fundamental', color_by='stdev', filename=filename, colormap = colormap, y_function='log')

	filename = folder + 'b.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='overshoot', y_name='stdev', color_by='time_to_reach_new_fundamental', filename=filename, colormap = colormap)

	filename = folder + 'c.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='time_to_reach_new_fundamental', y_name='round_stable', color_by='stdev', filename=filename, colormap = colormap)
	
	filename = folder + 'd.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='stdev', y_name='round_stable', color_by='time_to_reach_new_fundamental', filename=filename, colormap = colormap, x_function='log', y_function='log')

	filename = folder + 'e.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='stdev', y_name='time_to_reach_new_fundamental', color_by='round_stable', filename=filename, colormap = colormap, x_function='log', y_function='log')
	
	filename = folder + 'f.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='time_to_reach_new_fundamental', y_name='stdev', color_by='round_stable', filename=filename, colormap = colormap)

	filename = folder + 'g.png'
	make_color_grouped_scatter_plot(data_frame=fit_data, x_name='time_to_reach_new_fundamental', y_name='stdev', color_by='round_stable', filename=filename, colormap = colormap, x_function='log', y_function='log', color_function='log')
コード例 #8
0
def issue_36_kernelPCA(dataset, load_from_file):
	import IO
	from data_analysis import reduce_npoints_kmeans
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)


	reduced_par, labels_all_datapoints, km = reduce_npoints_kmeans(par_data, dataset, n_datapoints=1000, load_from_file=load_from_file)	
	return reduced_par, labels_all_datapoints, km
コード例 #9
0
def latency_vs_fitness_with_lines_for_agent_ratio(dataset):
	from plotting import multiline_xy_plot
	from utils import make_issue_specific_figure_folder
	def get_ssmmlat_mask(l, u): 
		return (p.ssmm_latency_mu > l) & (p.ssmm_latency_mu < u)

	def get_sclat_mask(l, u): 
		return (p.sc_latency_mu > l) & (p.sc_latency_mu < u)

	def zip_to_tuples(r): return zip(r[:-1], r[1::])


	def calc_and_plot(ratio_direction):	
		for fitness in f.columns:
			ssmm_ys = list()
			sc_ys = list()
			legend_labels = list()
			for ratio_lower, ratio_upper in zip_to_tuples(ratio_range):
				ratio_mask = (ratio_lower < p.ratio) & (p.ratio < ratio_upper)
				ssmm_lat_range = concat(map(lambda l: f[get_ssmmlat_mask(l,l+20) & ratio_mask].mean(), ssmmlatencyrange), axis=1).transpose()
				ssmm_ys.append(ssmm_lat_range[fitness])
				sc_lat_range = concat(map(lambda l: f[get_sclat_mask(l,l+20) & ratio_mask].mean(), sclatencyrange), axis=1).transpose()
				sc_ys.append(sc_lat_range[fitness])
				legend_labels.append(r'$\displaystyle %s < %s < %s$'%(round(ratio_lower,1), fl(ratio_direction, mathmode = False), round(ratio_upper,1)))
			filename = '%s_%s_%s_mmlatency.png'%(folder, ratio_direction, fitness)
			
			multiline_xy_plot(ssmm_lat_range.index, ssmm_ys, xlabel = 'ssmm_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename)
			filename = '%s_%s_%s_sclatency.png'%(folder, ratio_direction, fitness)
			
			multiline_xy_plot(sc_lat_range.index, sc_ys, xlabel = 'sc_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename)


	ssmmlatencyrange = range(100)
	sclatencyrange = range(100)
	
	folder = make_issue_specific_figure_folder('latency_vs_fitness_with_lines_for_agent_ratio', dataset)
	
	if dataset == 'd10d11':
		f, p = utils.load_d10d11()
	else:
		f,p,g, i=IO.load_pickled_generation_dataframe(dataset_name=dataset)
		if dataset == 'd10':
			p['sc_nAgents'] = 150
		elif dataset == 'd11':
			p['ssmm_nAgents'] = 52
	
	#nssmm_mask = p.ssmm_nAgents > 50
	#f = f[nssmm_mask]
	#p = p[nssmm_mask]

	p['ratio'] = p['sc_nAgents'].astype(float) / p['ssmm_nAgents']
	ratio_range = np.linspace(0,3,6)
	calc_and_plot('ratioagent')

	p['ratio'] = p['ssmm_nAgents'].astype(float) / p['sc_nAgents']
	ratio_range = [0,0.01, 0.2,0.35,0.6,1]
	calc_and_plot('ratioagentinv')
コード例 #10
0
ファイル: gp.py プロジェクト: halfdanrump/MarketSimulation
def myown():
    from sklearn.cross_validation import train_test_split
    
    import IO
    fit, par, gen, ids = IO.load_pickled_generation_dataframe('d10')
    all_data = concat([fit, par], axis=1)
    train, test = map(lambda x: DataFrame(x, columns=all_data.columns), train_test_split(all_data, test_size = 0.95))
    x, y = get_xy(train, features=['time_to_reach_new_fundamental'], target='ssmm_nAgents')
    return x,y
コード例 #11
0
def issue_118_fitness_corelation_matrix(dataset):
	import IO
	from plotting import plot_correlation_matrix
	from utils import format_as_latex_parameter as fl
	folder = make_issue_specific_figure_folder('fitness_correlation', dataset)
	f,p,g, i=IO.load_pickled_generation_dataframe(dataset)
	mask = f.overshoot < 5
	c = np.corrcoef(f[mask].transpose())
	labels = map(fl, f.columns)
	plot_correlation_matrix(folder + 'correlation_matrix.png', c, labels)
コード例 #12
0
def issue_88_affinity_after_norm_and_outlier(dataset, load_from_file):
	from sklearn.preprocessing import scale
	from sklearn.cluster import AffinityPropagation
	from data_analysis import reduce_npoints_kmeans, outlier_detection_with_SVM
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)
	reduced_fitness, labels, km = reduce_npoints_kmeans(dataframe = par_data, dataset_name = dataset, data_name='parameter', n_datapoints = 1000, load_from_file = load_from_file)
	inliers_idx_r, outliers_idx_r = outlier_detection_with_SVM(reduced_fitness, kernel='rbf', gamma=0.1, outlier_percentage=0.01)
	return inliers_idx_r, outliers_idx_r
	ap = AffinityPropagation(damping=0.97)
	ap.t
コード例 #13
0
def issue_83_example_table():
	
	from thesis_plots import table_save_path
	fit, par, gen, ids = IO.load_pickled_generation_dataframe('d3')
	tex_partable = utils.dataframe2latex(par.iloc[range(10),:], 'table:example_dataset_parameters', 'An example data matrix containing the parameters of ten individuals who lived sometime during the execution of the genetic algortihm. In this case, each individual contained paremeters for the number of HFT agents, as well as the latency and thinking time parameters. Hence, the data matrix has a column for each.')
	with open('%sexample_dataset_parameters.tex'%table_save_path, 'w') as f:
			f.write(tex_partable)
	tex_fittable = utils.dataframe2latex(fit.iloc[range(10),:], 'table:example_dataset_fitnesses', 'This table contains the fitness values for each individual in table \\ref{table:example_dataset_parameters}. Note that, in order to increase the reliability of the fitness measure of an individual, the recorded fitness values are the average of the fitnesses obtained by evaluating each individual ten times')		
	with open('%sexample_dataset_fitnesses.tex'%table_save_path, 'w') as f:
			f.write(tex_fittable)
コード例 #14
0
def issue_130_overvaluation_scatter():
	from plotting import make_scatter_plot_for_labelled_data
	folder = make_issue_specific_figure_folder('issue_130_overvaluation_scatter', 'd10')
	colormap = brewer2mpl.get_map('RdBu', 'Diverging', 11, reverse=True)
	ind = IO.load_pickled_generation_dataframe('d10', True)
	mask = ind.ssmm_latency_mu > 0
	#plot(ind.ssmm_nAgents[mask], ind.ssmm_latency_mu[mask], 'r.'); 
	xlab = r'$N_m$'
	ylab = r'$\lambda_{m,\mu}$'
	l = np.repeat(10, len(ind[mask]))
	filename = folder + 'scatter.png'
	make_scatter_plot_for_labelled_data(ind[mask], 'ssmm_nAgents', 'ssmm_latency_mu', l, filename, colormap, point_size = 20)
コード例 #15
0
def faster_mm_many_chartists():
	from plotting import multiline_xy_plot
	folder = make_issue_specific_figure_folder('par_tendencies', 'all')
	
	def mkplot(filename, line_parameter, intervals_for_lines, range_parameter, fitness_type, legend_caption, xlabel, ylabel):
		ylines = list()
		labels = list()
		x = list(set(p[range_parameter]))

		for lb, ub in zip(intervals_for_lines[:-1], intervals_for_lines[1::]): 
			line = map(lambda l: f[(p[range_parameter] == l) & (lb <= p[line_parameter]) & (p[line_parameter] < lb + ub)][fitness_type].mean(), set(p[range_parameter]))
			ylines.append(line)
			labels.append('%s < %s < %s'%(lb,legend_caption, ub))
		line = map(lambda l: f[(p[range_parameter] == l) & (intervals_for_lines[-1] <= p[line_parameter])][fitness_type].mean(), set(p[range_parameter]))
		ylines.append(line)
		labels.append('%s < %s '%(intervals_for_lines[-1], legend_caption))
		print xlabel
		print ylabel
		print labels
		multiline_xy_plot(x, ylines, ylabel = ylabel, xlabel=xlabel, filename = filename, y_errorbars = None, save_figure = True, legend_labels = labels)


	f,p,g,i = IO.load_pickled_generation_dataframe('d11')
	filename = folder + 'd11_overshoot_mm_latency.png'
	mkplot(filename = filename, line_parameter='sc_nAgents', intervals_for_lines = [0, 50, 100], range_parameter='ssmm_latency_mu', fitness_type='overshoot', legend_caption = '# chartists', xlabel = 'Average market maker latency', ylabel = 'Average model overshoot')
	
	filename = folder + 'd11_overshoot_chartist_latency.png'
	mkplot(filename = filename, line_parameter='sc_nAgents', intervals_for_lines = [0, 50, 100], range_parameter='sc_latency_mu', fitness_type='overshoot', legend_caption = '# chartists', xlabel = 'Average chartist latency', ylabel = 'Average model overshoot')

	f,p,g,i = IO.load_pickled_generation_dataframe('d10')
	filename = folder + 'd10_overshoot_mm_latency.png'
	mkplot(filename = filename, line_parameter='ssmm_nAgents', intervals_for_lines = [0, 50, 100], range_parameter='ssmm_latency_mu', fitness_type='overshoot', legend_caption = '# market makers', xlabel = 'Average market maker latency', ylabel = 'Average model overshoot')
	
	filename = folder + 'd10_overshoot_chartist_latency.png'
	mkplot(filename = filename, line_parameter='ssmm_nAgents', intervals_for_lines = [0, 50, 100], range_parameter='sc_latency_mu', fitness_type='overshoot', legend_caption = '# market makers', xlabel = 'Average chartist latency', ylabel = 'Average model overshoot')

	filename = folder + 'd10_overshoot_ssmm_nAgents.png'
	mkplot(filename = filename, line_parameter='ssmm_latency_mu', intervals_for_lines = [0, 20, 40, 60], range_parameter='ssmm_nAgents', fitness_type='overshoot', legend_caption = 'ssmm latency', xlabel = 'Average # market makers', ylabel = 'Average model overshoot')
コード例 #16
0
def issue_41(n_clusters, dataset):
	"""
	Calculate clusters for K-means and calculate fitness stats for each cluster
	"""
	fit_data, par_data, gen, ids= IO.load_pickled_generation_dataframe(dataset)
	par_trans, pca, components = calculate_pca(par_data, n_components=4)

	kmeans = KMeans(n_clusters=n_clusters, n_jobs=-1, verbose=0)
	kmeans.fit(par_trans)
	fit_data['label'] = kmeans.labels_
	group = fit_data.groupby('label')
	print "Count with KMmeans\n", group.count()
	print "mean with KMmeans\n", group.mean()
	print "std with KMmeans\n", group.std()
	print "median with KMmeans\n", group.median()
コード例 #17
0
def issue_115_agent_ratio(ratio_threshold):
	from numpy import where
	def run_issue(name, all_fit, all_par):
		all_par['chartist_per_market_maker'] = all_par.sc_nAgents.astype(float) / all_par.ssmm_nAgents
		par_to_plot = DataFrame(all_par['chartist_per_market_maker'])

		over_threshold_idx, = where(par_to_plot['chartist_per_market_maker'] > ratio_threshold)
		print 'Dropping over ratio threshold rows: %s (%s rows)'%(over_threshold_idx, len(over_threshold_idx))
		par_to_plot = par_to_plot.drop(over_threshold_idx)
		all_fit = all_fit.drop(over_threshold_idx)
		issue_101_plot_pars_vs_fitness(name, overshoot_threshold = 10, preloaded_data = {'fit':all_fit, 'par':par_to_plot})
	
	fit10, par10, gen10, ids10 = IO.load_pickled_generation_dataframe('d10')
	fit11, par11, gen11, ids11 = IO.load_pickled_generation_dataframe('d11')
	
	par10['sc_nAgents'] = 150
	par11['ssmm_nAgents'] = 52

	run_issue('d10', fit10, par10)
	run_issue('d11', fit10, par10)

	all_par = concat([par10, par11])
	all_fit = concat([fit10, fit11])
	run_issue('d10d11',all_fit, all_par)
コード例 #18
0
def average_par_vs_fitnesses(dataset):
	from plotting import multiline_xy_plot
	folder = make_issue_specific_figure_folder('average_par_vs_fitness_lineplot', dataset)

	"""
	TRIED AND FAILED TO MAKE THE WHOLE THING WITHOUT A LOOP USING MAPS INSTEAD
	def mkplot(fitness):
		fitness = 'overshoot'
		mask = f < f.quantile(0.9)
		masked_f = f[mask]
		hists = map(lambda x: np.histogram(x, bins = 50), map(lambda x: list(masked_f[x]), masked_f.columns))
		bins = list(zip(*bins)[1])
		bins = DataFrame(np.transpose(bins), columns = masked_f.columns)
		ws = bins.iloc[1,:] - bins.iloc[0,:]

		counts, bins = np.histogram(f[fitness][mask], bins=50)
		return mask, bins
		ws = bins[1] - bins[0]
		means = concat(map(lambda x: p[(f[fitness][mask] > x - ws) & (f[fitness][mask] < x + ws)].mean(), bins), axis=1).transpose()
		means_as_list = map(lambda x: list(means[x]), means.columns)
		xlabel = fl(fitness)
		ylabel = ''
		legend_labels = map(fl, means.columns)
		filename = folder + '%s.png'%fitness
		multiline_xy_plot(bins, means_as_list, xlabel, ylabel, legend_labels, filename)
	"""
	
	def mkplot(fitness):
		mask = f[fitness] < f[fitness].quantile(0.95)
		mask &= f.overshoot < 5
		masked_fit = f[mask][fitness]
		masked_par = p[mask]
		counts, bins = np.histogram(masked_fit, bins = 50)
		ws = (bins[1] - bins[0])
		means = concat(map(lambda bin: masked_par[(masked_fit > bin - ws) & (masked_fit < bin + ws)].mean(), bins), axis=1).transpose()
		means_as_list = map(lambda x: list(means[x]), means.columns)
		xlabel = fl(fitness)
		ylabel = ''
		legend_labels = map(fl, means.columns)
		filename = folder + '%s.png'%fitness
		multiline_xy_plot(bins, means_as_list, xlabel, ylabel, legend_labels, filename)

	f,p,g, i=IO.load_pickled_generation_dataframe(dataset)
	for fitness in f.columns: mkplot(fitness)
コード例 #19
0
def issue_65_run_sim_for_clusters(dataset, n_clusters, load_from_file = False):
	from settings import get_fixed_parameters
	from fitness import evaluate_simulation_results
	import settings
	import os
	settings.PLOT_SAVE_PROB = 1
	
	fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	stats, pvals, kmeans = issue_55_calc_cluster_stats(dataset, n_clusters, load_from_file)
	graph_folder = '/Users/halfdan/Dropbox/Waseda/Research/MarketSimulation/Thesis/data_for_figures/issue_65/'
	
	for c, cluster in enumerate(kmeans.cluster_centers_):
		parameters = get_fixed_parameters()
		
		parameters.update(dict(zip(par.columns, map(int, cluster))))
		print parameters
		#plot_name = '%scluster%s'%(graph_folder, c)
		folder = '%scluster_%s/'%(graph_folder,c)
		if not os.path.exists(folder): os.makedirs(folder)
		evaluate_simulation_results(folder, 0, parameters, range(4), autorun=True)
コード例 #20
0
def d9_diagional_points(n_centers = 100):
	from plotting import get_pretty_xy_plot
	def getidx(center, max_dist_to_diagonal = 400, averaging_window_size = 5000): 
		cond1 = (np.abs(fit.time_to_reach_new_fundamental - fit.round_stable) < max_dist_to_diagonal)
		cond2 = (fit.time_to_reach_new_fundamental > center - averaging_window_size)
		cond3 = (fit.time_to_reach_new_fundamental < center + averaging_window_size)
		return cond1 & cond2 & cond3

	folder = make_issue_specific_figure_folder('diagional_points', 'd9')
	fit, par, gen, ids = IO.load_pickled_generation_dataframe('d9')
	centers_to_calculate = np.linspace(10000, 90000, n_centers)
	list_of_dataframes = map(lambda i: par[getidx(i, 1000, 5000)], centers_to_calculate)
	mean_frame = concat(map(lambda x: getattr(x, 'mean')(), list_of_dataframes), axis=1).transpose()
	std_frame = concat(map(lambda x: getattr(x, 'std')(), list_of_dataframes), axis=1).transpose()
	for parameter in mean_frame.columns:
		filename = folder + parameter + '.png'
		#print mean_frame[parameter]
		y1 = mean_frame[parameter] - std_frame[parameter]
		y2 = mean_frame[parameter] + std_frame[parameter]
		ax, fig = get_pretty_xy_plot(x=centers_to_calculate, y=mean_frame[parameter], xlabel='Time to reach new fundamental', ylabel=parameter, filename = filename, save_figure = False)
		ax.fill_between(centers_to_calculate, y2.values, y1.values, color = 'gray', alpha = 0.5)
		fig.savefig(filename)
	return centers_to_calculate, mean_frame, std_frame
コード例 #21
0
def issue_43_outlier_detection(dataset, n_clusters, gamma, load_from_file = False):
	from plotting import make_color_grouped_scatter_plot, make_scatter_plot_for_labelled_data
	from data_analysis import reduce_npoints_kmeans, outlier_detection_with_SVM, calculate_pca
	from sklearn.cluster import KMeans	
	fit_data, par_data, gen, ids = IO.load_pickled_generation_dataframe(dataset_name=dataset)


	reduced_par, labels_all_datapoints, km = reduce_npoints_kmeans(par_data, dataset, 'parameters', n_datapoints=1000, load_from_file=load_from_file)
	
	inliers_idx, outliers_idx = outlier_detection_with_SVM(reduced_par, kernel='rbf', gamma=gamma, outlier_percentage=0.01)
	transformed_data, pca, components = calculate_pca(par_data.iloc[inliers_idx,:], n_components = 3, whiten=False, normalize=True)
	
	kmeans = KMeans(n_clusters = n_clusters)
	kmeans.fit(transformed_data.values)
	
	filename = figure_save_path + dataset + '_issue_43_parameters_PCA_after_outlier_detection.png'
	colormap = brewer2mpl.get_map('RdBu', 'diverging', 4, reverse=True)
	make_color_grouped_scatter_plot(transformed_data, x_name='d1', y_name='d2', color_by='d3', filename=filename, colormap=colormap)

	print "Making scatter plot of Affinity Propagation clusters of fitness data for dataset %s"%dataset
	filename = figure_save_path + dataset + '_issue_43_parameters_kmeans_after_outlier_and_PCA.png'
	colormap = brewer2mpl.get_map('Set2', 'Qualitative', n_clusters, reverse=True)

	make_scatter_plot_for_labelled_data(data_frame=transformed_data, x_name='d1', y_name='d2', labels=kmeans.labels_, filename=filename, colormap = colormap, legend=True)
コード例 #22
0
def issue_103_manually_removing_large_fitness_points(dataset, overshoot_threshold):
	from plotting import make_color_grouped_scatter_plot
	from numpy import where
	folder = make_issue_specific_figure_folder('103_scatter_manual_outlier', dataset)
	fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	#fit['overshoot'] -= 2
	o = where(fit.overshoot > overshoot_threshold)[0]
	not_o = where(fit.overshoot <= overshoot_threshold)[0]
	
	#colormap = brewer2mpl.get_map('RdBu', 'diverging', 4, reverse=True)
	colormap = brewer2mpl.get_map('Spectral', 'Diverging', 9, reverse=True)

	filename = folder + 'a.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'stdev', 'time_to_reach_new_fundamental', 'round_stable', filename, colormap)
	
	filename = folder + 'b.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'stdev', 'time_to_reach_new_fundamental', 'round_stable', filename, colormap, x_function='log', y_function='log', color_function='log')
	
	filename = folder + 'c.png'
	make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='time_to_reach_new_fundamental', y_name='round_stable', color_by='stdev', filename=filename, colormap = colormap)

	filename = folder + 'd.png'
	make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='stdev', y_name='round_stable', color_by='time_to_reach_new_fundamental', filename=filename, colormap = colormap, x_function='log', y_function='log')

	filename = folder + 'e.png'
	make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='stdev', y_name='overshoot', color_by='time_to_reach_new_fundamental', filename=filename, colormap = colormap)

	filename = folder + 'h.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'stdev', 'time_to_reach_new_fundamental', 'round_stable', filename, colormap, x_function='log')

	filename = folder + 'i.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'stdev', 'time_to_reach_new_fundamental', 'round_stable', filename, colormap)
	
	filename = folder + 'l.png'
	make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='stdev', y_name='round_stable', color_by='overshoot', filename=filename, colormap = colormap, x_function='log', y_function='log')

	filename = folder + 'k.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'stdev', 'time_to_reach_new_fundamental', 'overshoot', filename, colormap, x_function='log')

	filename = folder + 'g.png'
	make_color_grouped_scatter_plot(fit.iloc[not_o], 'overshoot', 'time_to_reach_new_fundamental', 'round_stable', filename, colormap)
	
	filename = folder + 'j.png'
	ax, fig = make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='time_to_reach_new_fundamental', y_name='round_stable', color_by='overshoot', filename=filename, colormap = colormap)
	ax.plot(range(0,10**5), range(0,10**5), linestyle='dashed', color='black', alpha = 0.5)
	ax.text(40000, 80000, "A", fontsize = 18, alpha = 0.6)
	ax.text(80000, 40000, "B", fontsize = 18, alpha = 0.6)
	fig.savefig(filename)

	#Plot with A B regions
	filename = folder + 'f.png'
	ax, fig = make_color_grouped_scatter_plot(data_frame=fit.iloc[not_o], x_name='time_to_reach_new_fundamental', y_name='round_stable', color_by='stdev', filename=filename, colormap = colormap, color_function='log')
	ax.plot(range(0,10**5), range(0,10**5), linestyle='dashed', color='black', alpha = 0.5)
	ax.text(40000, 80000, "A", fontsize = 18, alpha = 0.6)
	ax.text(80000, 40000, "B", fontsize = 18, alpha = 0.6)
	fig.savefig(filename)
	
	stats = concat([par.iloc[not_o,:].mean(), par.iloc[o,:].mean(), par.iloc[not_o,:].std(), par.iloc[o,:].std()], axis=1)
	lt = '$\overshoot > %s$'%overshoot_threshold
	st = '$\overshoot < %s$'%overshoot_threshold
	stats.columns = ['%s (mean)'%st, '%s (mean)'%lt, '%s (std)'%st, '%s (std)'%lt]
	
	tex_index = utils.get_latex_par_names_from_list(stats.index.tolist())
	stats.index = tex_index
	print utils.prettify_table(stats.to_latex(float_format=lambda x: str(round(x,1))), 'LABEL', 'CAPTION')
	return stats
コード例 #23
0
def collect_filter_individuals_and_replot(dataset, action, n_graphs_to_copy = 10, masks = None):
	import os, shutil
	from IO import figure_save_path
	from plotting import make_pretty_tradeprice_plot
	
	fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	if not masks: 
		masks,filters = apply_filters(fit)
	# Give proper column name
	ids.columns = ['id', 'gen']
	raw_data_path = '/Users/halfdan/raw_data/d11/graphs/'
	graph_save_dir = figure_save_path + 'filter_graphs/'

	def replot(mask, subfolder_name, f = 0):
		try:
			in_mask = ids[mask]
			has_tuple = in_mask['id'].map(lambda x: isinstance(x, tuple))
			in_mask = in_mask[has_tuple]
		
			indexes = range(len(in_mask))
			np.random.shuffle(indexes)
			
			if n_graphs_to_copy == 'all':
				n_copy = len(indexes) - 1
			else:
				n_copy = n_graphs_to_copy
			names = map(lambda x: x[0], in_mask.iloc[indexes[0:n_copy]]['id'].values)
			#print 'names'
			#print names
			paths = map(lambda x: '%s%s'%(raw_data_path, x), names)
			graph_paths = map(lambda x: '%s.png'%x, paths)
			data_paths = map(lambda x: '%s.npz'%x, paths)
			#all_data = map(lambda x: np.load(x).items()[0][1].item(), data_paths)
			directory = '%s%s%s/'%(graph_save_dir, subfolder_name, f)
			parameters = DataFrame(columns = par.columns)
			if not os.path.exists(directory): os.makedirs(directory)
			for path, data_path, name in zip(paths, data_paths, names):
				data = np.load(data_path).items()[0][1].item()
				rounds = data['rounds']
				prices = data['tradePrice']
				filename = directory + name + '.png'
				parameters = parameters.append(data['parameters'], ignore_index=True)
				print 'Replotting market with pars: %s'%data['parameters']
				make_pretty_tradeprice_plot(rounds, prices, filename)
			return parameters
			
			for path, graph, name in zip(paths, graph_paths, names):
				print 'copy %s to %s'%(graph, directory + name + '.png')
				shutil.copyfile(graph, directory + name + '.png')
		except IndexError:
			pass

	graph_cond = ids.id != ()
	if action == 'filter':
		for f, m in enumerate(masks):	
			filter_mask = m & graph_cond
			replot(filter_mask, action, f)
			graph_save_dir += 'filter/'
	elif action == 'large_overshoot':
		m = fit.overshoot > 25
		m = m & graph_cond
		graph_save_dir += 'large_overshoot/'
		return replot(m, action)
	elif action == 'no_response':
		m = fit.overshoot == 10**6
		m = m & graph_cond
		graph_save_dir += 'large_overshoot/'
		return replot(m, action)
	elif action == 'slow_simulations':
		mask = fit.time_to_reach_new_fundamental > 50000
		graph_save_dir += 'slow_simulations/'
		replot(mask, action)
	else:
		print 'Doing nothing'
コード例 #24
0
def print_quantile_tables():
	f,p,g, i=IO.load_pickled_generation_dataframe(dataset)
	uppermask = p.ssmm_latency_mu > p.ssmm_latency_mu.quantile(0.9)
	lowermask = p.ssmm_latency_mu < p.ssmm_latency_mu.quantile(0.1)
	print concat([f[lowermask].mean(), f[uppermask].mean()], axis=1).to_latex(float_format = lambda x: str(round(x,1)))
コード例 #25
0
ファイル: gp.py プロジェクト: halfdanrump/MarketSimulation
def run():
    #----------------------------------------------------------------------
    #  First the noiseless case
    X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T

    # Observations
    y = f(X).ravel()

    # Mesh the input space for evaluations of the real function, the prediction and
    # its MSE
    x = np.atleast_2d(np.linspace(0, 10, 1000)).T

    # Instanciate a Gaussian Process model
    gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
                         random_start=100)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(X, y)

    # Make the prediction on the meshed x-axis (ask for MSE as well)
    y_pred, MSE = gp.predict(x, eval_MSE=True)
    sigma = np.sqrt(MSE)

    # Plot the function, the prediction and the 95% confidence interval based on
    # the MSE
    fig = pl.figure()
    pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
    pl.plot(X, y, 'r.', markersize=10, label=u'Observations')
    pl.plot(x, y_pred, 'b-', label=u'Prediction')
    pl.fill(np.concatenate([x, x[::-1]]),
            np.concatenate([y_pred - 1.9600 * sigma,
                           (y_pred + 1.9600 * sigma)[::-1]]),
            alpha=.5, fc='b', ec='None', label='95% confidence interval')
    pl.xlabel('$x$')
    pl.ylabel('$f(x)$')
    pl.ylim(-10, 20)
    pl.legend(loc='upper left')

    #----------------------------------------------------------------------
    # now the noisy case
    import IO
    fit,par,gen,ids = IO.load_pickled_generation_dataframe('d10')

    #X = np.linspace(0.1, 9.9, 20)
    #X = np.atleast_2d(X).T
    #print X.shape
    X = fit['time_to_reach_new_fundamental'].iloc[range(100)].reshape((100,1))
    y = par['ssmm_nAgents'].iloc[range(100)].tolist()
    print X
    print y
    # Obsevations and noise
    #y = f(X).ravel()
    
    #dy = 0.5 + 1.0 * np.random.random(y.shape)
    #noise = np.random.normal(0, dy)
    #y += noise

    # Mesh the input space for evaluations of the real function, the prediction and
    # its MSE
    x = np.atleast_2d(np.linspace(0, 10, 1000)).T

    # Instanciate a Gaussian Process model
    gp = GaussianProcess(corr='squared_exponential', theta0=1e-1,
                         thetaL=1e-3, thetaU=1,
                         random_start=100)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(X, y)

    # Make the prediction on the meshed x-axis (ask for MSE as well)
    y_pred, MSE = gp.predict(x, eval_MSE=True)
    sigma = np.sqrt(MSE)

    # Plot the function, the prediction and the 95% confidence interval based on
    # the MSE
    fig = pl.figure()
    pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
    pl.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
    pl.plot(x, y_pred, 'b-', label=u'Prediction')
    pl.fill(np.concatenate([x, x[::-1]]),
            np.concatenate([y_pred - 1.9600 * sigma,
                           (y_pred + 1.9600 * sigma)[::-1]]),
            alpha=.5, fc='b', ec='None', label='95% confidence interval')
    pl.xlabel('$x$')
    pl.ylabel('$f(x)$')
    pl.ylim(-10, 20)
    pl.legend(loc='upper left')

    pl.show()
コード例 #26
0
def issue_108(dataset, n_clusters, overshoot_threshold, load_pickled_labels = False, labels_to_include = []):
	from numpy import where, repeat, log, random
	from sklearn.cluster import KMeans
	from plotting import make_scatter_plot_for_labelled_data
	from data_analysis import calculate_stats_for_dataframe
	from sklearn.mixture import GMM
	from sklearn.decomposition import PCA
	from sklearn.preprocessing import scale
	import cPickle
	if labels_to_include: labels_to_plot = labels_to_include
	else: labels_to_plot = range(n_clusters)

	plots_to_make = [
					{'x_name':'stdev', 'x_function':'log', 'y_name':'round_stable', 'y_function':'log'},
					{'x_name':'time_to_reach_new_fundamental', 'y_name':'round_stable'},
					{'x_name':'stdev', 'x_function':'log', 'y_name':'time_to_reach_new_fundamental'}
					]

	folder = make_issue_specific_figure_folder('108 cluster after removing outliers', dataset)
	fit, par, gen, ids = IO.load_pickled_generation_dataframe(dataset)
	o = where(fit.overshoot > overshoot_threshold)[0]
	not_o = where(fit.overshoot <= overshoot_threshold)[0]
	data_to_plot = fit.iloc[not_o]
	pca = PCA(n_components = 3)
	par_transformed = pca.fit_transform(scale(par.iloc[not_o].astype(float)))
	par_transformed += random.random(par_transformed.shape)*0.2
	par_inliers_PCA = DataFrame(par_transformed, columns = ['PC1', 'PC2', 'PC3'])
	
	"""
	print 'Component 0:'
	print map(lambda c, n: '%s=%.3g'%(n, c), pca.components_[0], utils.get_latex_par_names_from_list(par.columns))
	print 'Component 1:'
	print map(lambda c, n: '%s=%.3g'%(n, c), pca.components_[1], utils.get_latex_par_names_from_list(par.columns))
	print 'Component 1:'
	print map(lambda c, n: '%s=%.3g'%(n, c), pca.components_[2], utils.get_latex_par_names_from_list(par.columns))
	"""
	

	def print_pca_components(pca, name):
		from plotting import plot_pca_components
		components = DataFrame(pca.components_, columns=utils.get_latex_par_names_from_list(par.columns))
		filename = folder + 'clustering_%s_%s'%(dataset, name)
		plot_pca_components(filename=filename, components=components[::-1])
		components['$\\gamma$'] = pca.explained_variance_ratio_
		
		filename = folder + 'clustering_%s_%s.tex'%(dataset, name)
		tex = utils.prettify_table(components.to_latex(float_format=lambda x: str(round(x,2))), label='table:clustering_%s_%s'%(dataset, name), caption='XXX')
		with open(filename, 'w') as f:
				f.write(tex)
	print_pca_components(pca, 'allclusters')

	def make_tables(clustering_method, name, cluster_labels):
		
		def make_table(stat):
			fit_inlier_stats = calculate_stats_for_dataframe(fit.iloc[not_o,:], cluster_labels)
			fit_outlier_stats = calculate_stats_for_dataframe(fit.iloc[o,:], repeat(0, len(o)))
			fit_mean_table = concat([fit_inlier_stats[stat], fit_outlier_stats[stat]], axis=1)
			fit_mean_table = fit_mean_table.drop('Count', axis=0)
			fit_mean_table.index = utils.get_latex_par_names_from_list(fit_mean_table.index)
			fit_mean_table = fit_mean_table.transpose()
			
			par_inlier_stats = calculate_stats_for_dataframe(par.iloc[not_o,:], cluster_labels)
			par_outlier_stats = calculate_stats_for_dataframe(par.iloc[o,:], repeat(0, len(o)))
			par_mean_table = concat([par_inlier_stats[stat], par_outlier_stats[stat]], axis=1)
			par_mean_table.index = utils.get_latex_par_names_from_list(par_mean_table.index)
			par_mean_table = par_mean_table.transpose()
			
			full_table = concat([fit_mean_table, par_mean_table], axis=1)
			print full_table.columns
			full_table = full_table.sort('\\overshoot')
			tex = full_table.to_latex(float_format=lambda x: str(round(x,1)))
			
			tex = utils.prettify_table(full_table.to_latex(float_format=lambda x: str(round(x,1))), 'table:fit_gmm_'+name, 'gmm_'+name)
			filename = folder + '%s_%s_%s_%s.tex'%(n_clusters,clustering_method, name, stat)
			with open(filename, 'w') as f:
				f.write(tex)
		
		make_table('Mean')
		make_table('Std')
	#def make_pca_plots():


	def make_plots(clustering_method, data_name, labels):
		colormap = brewer2mpl.get_map('Paired', 'Qualitative', n_clusters, reverse=True)
		if not labels_to_include:
			for i, plotargs in enumerate(plots_to_make):
				filename = folder + '%s_%s_%s_fit_%s.png'%(n_clusters, clustering_method, data_name, i)
				make_scatter_plot_for_labelled_data(data_to_plot, labels=labels, filename=filename, colormap=colormap, legend = True, **plotargs)
			filename = folder + '%s_%s_%s_par.png'%(n_clusters, clustering_method, data_name)
			make_scatter_plot_for_labelled_data(par_inliers_PCA, x_name='PC1', y_name='PC2', labels=labels, filename=filename, colormap=colormap, legend = True)
			filename = folder + '%s_%s_%s_par_omit.png'%(n_clusters, clustering_method, data_name)
			make_scatter_plot_for_labelled_data(par_inliers_PCA, x_name='PC1', y_name='PC2', labels=labels, filename=filename, colormap=colormap, legend = True, omit_largest=n_clusters-4)
		else:
			ltp = '_'.join(map(str, labels_to_plot))
			filename = folder + '%s_%s_%s_par_%s_pca_1v2.png'%(n_clusters, clustering_method, data_name, ltp)
			data = par_inliers_PCA[['PC1', 'PC2']]
			make_scatter_plot_for_labelled_data(data, x_name='PC1', y_name='PC2', labels=labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			filename = folder + '%s_%s_%s_par_%s_pca_1v3.png'%(n_clusters, clustering_method, data_name, ltp)
			data = par_inliers_PCA[['PC1', 'PC3']]
			make_scatter_plot_for_labelled_data(data, x_name='PC1', y_name='PC3', labels=labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			filename = folder + '%s_%s_%s_par_%s_pca_2v3.png'%(n_clusters, clustering_method, data_name, ltp)
			data = par_inliers_PCA[['PC2', 'PC3']]
			make_scatter_plot_for_labelled_data(data, x_name='PC2', y_name='PC3', labels=labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			

			selective_par = par.iloc[not_o][get_indluded_labels_mask(labels, *labels_to_plot)]
			selective_labels = labels[get_indluded_labels_mask(labels, *labels_to_include)]
			pca_selective = PCA(3)
			selective_par = DataFrame(pca_selective.fit_transform(selective_par), columns = ['PC1', 'PC2', 'PC3'])
			
			print_pca_components(pca_selective, ltp)

			filename = folder + '%s_%s_%s_par_%s_pca_1v2_selective.png'%(n_clusters, clustering_method, data_name, ltp)
			data = selective_par[['PC1', 'PC2']]
			make_scatter_plot_for_labelled_data(data, x_name='PC1', y_name='PC2', labels=selective_labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			filename = folder + '%s_%s_%s_par_%s_pca_1v3_selective.png'%(n_clusters, clustering_method, data_name, ltp)
			data = selective_par[['PC1', 'PC3']]
			make_scatter_plot_for_labelled_data(data, x_name='PC1', y_name='PC3', labels=selective_labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			filename = folder + '%s_%s_%s_par_%s_pca_2v3_selective.png'%(n_clusters, clustering_method, data_name, ltp)
			data = selective_par[['PC2', 'PC3']]
			make_scatter_plot_for_labelled_data(data, x_name='PC2', y_name='PC3', labels=selective_labels, filename=filename, colormap=colormap, legend = True, labels_to_plot = labels_to_plot)
			#par_data_for_plotting = par.iloc[not_o].copy() + random.random(par.iloc[not_o].shape) * 0.2
			#filename = folder + '%s_%s_%s_par_latencies.png'%(n_clusters, clustering_method, data_name)
			#columns = ['ssmm_latency_mu', 'sc_latency_mu']
			#make_scatter_plot_for_labelled_data(par_data_for_plotting[columns], 'ssmm_latency_mu', 'sc_latency_mu', labels, filename, colormap, legend = True, labels_to_plot = labels_to_plot)
		

	def get_indluded_labels_mask(labels, *labels_to_include):
		from numpy import repeat
		mask = repeat(False, labels.shape)
		for l in labels_to_include:
			mask = mask | (labels == l)
			print mask
		
		return mask

	def cluster_and_label(name, data_to_cluster):
		
		data_to_cluster = scale(data_to_cluster)
		"""
		km_labels_store_file = folder + '%s_%s_%s_classifier.pickle'%(n_clusters, 'km', name)
		if load_pickled_labels:
			with open(km_labels_store_file, 'rb') as fid:
				km_labels = cPickle.load(fid)
		else:
			km = KMeans(n_clusters=n_clusters)
			km.fit(data_to_cluster)
			km_labels = km.predict(data_to_cluster)
			with open(km_labels_store_file, 'wb') as fid:
				cPickle.dump(km_labels, fid)
		make_plots('km', name, km_labels)
		make_tables('km', name, km_labels)
		"""

		gmm_labels_store_file = folder + '%s_%s_%s_classifier.pickle'%(n_clusters, 'gmm', name)
		if load_pickled_labels:
			with open(gmm_labels_store_file, 'rb') as fid:
				gmm_labels = cPickle.load(fid)
		else:
			gmm = GMM(n_components = n_clusters, covariance_type = 'full')
			gmm.fit(data_to_cluster)
			gmm_labels = gmm.predict(data_to_cluster)
			with open(gmm_labels_store_file, 'wb') as fid:
				cPickle.dump(gmm_labels, fid)
		#make_plots('gmm', name, gmm_labels)
		make_tables('gmm', name, gmm_labels)

		
		

		
		

	#data_to_cluster = concat([log(fit['stdev']), log(fit['round_stable']), fit['time_to_reach_new_fundamental']], axis=1).iloc[not_o,:]
	#cluster_and_label('logs_logr_t', data_to_cluster)
	"""
	data_to_cluster = concat([log(fit['stdev']), log(fit['round_stable'])], axis=1).iloc[not_o,:]
	cluster_and_label('logs_logr', data_to_cluster, 'stdev', 'round_stable', 'log', 'log')

	data_to_cluster = concat([fit['round_stable'], log(fit['stdev'])], axis=1).iloc[not_o,:]
	cluster_and_label('r_logs', data_to_cluster, 'time_to_reach_new_fundamental', 'round_stable')

	data_to_cluster = concat([fit['round_stable'], fit['time_to_reach_new_fundamental'], log(fit['stdev'])], axis=1).iloc[not_o,:]
	cluster_and_label('t_r_logs', data_to_cluster, 'time_to_reach_new_fundamental', 'round_stable')

	data_to_cluster = concat([fit['round_stable'], fit['time_to_reach_new_fundamental']], axis=1).iloc[not_o,:]
	cluster_and_label('t_r', data_to_cluster, 'time_to_reach_new_fundamental', 'round_stable')
	"""

	data_to_cluster = fit.iloc[not_o,:]
	cluster_and_label('all', data_to_cluster)
	
	data_to_cluster = concat([log(fit['stdev']), log(fit['round_stable']), fit['time_to_reach_new_fundamental']], axis=1).iloc[not_o,:]
	cluster_and_label('logs_logr_t', data_to_cluster)
コード例 #27
0
def faster_mm_makes_worse_markets(dataset):
	from plotting import multiline_xy_plot
	from utils import make_issue_specific_figure_folder
	def get_mmlat_mask(l, u): 
		return (p.ssmm_latency_mu > l) & (p.ssmm_latency_mu < u)

	def get_ssmmlatencyrange_mean(agent_mask, ssmmlatencyrange = range(1,100), nsc_lower = 0):
		return concat(map(lambda l: f[get_mmlat_mask(l,l+20) & agent_mask].mean(), ssmmlatencyrange), axis=1).transpose()

	def get_sclat_mask(l, u): 
		return (p.sc_latency_mu > l) & (p.sc_latency_mu < u)

	def get_sclatencyrange_mean(agent_mask, sclatencyrange = range(1,100), nsc_lower = 0):
		return concat(map(lambda l: f[get_sclat_mask(l,l+20) & agent_mask].mean(), sclatencyrange), axis=1).transpose()

	def get_nchartist_mask(lower, upper):
		return (p.sc_nAgents >= lower) & (p.sc_nAgents < upper)

	def get_nmm_mask(lower, upper):
		return (p.ssmm_nAgents >= lower) & (p.ssmm_nAgents < upper)
	
	def zip_to_tuples(r): return zip(r[:-1], r[1::])

	ssmmlatencyrange = range(80)
	sclatencyrange = range(100)

	

	if dataset == 'd10d11':
		f, p = utils.load_d10d11()
	else:
		f,p,g, i=IO.load_pickled_generation_dataframe(dataset_name=dataset)

	folder = make_issue_specific_figure_folder('faster_mm_makes_worse_markets', dataset)
	try:
		for fitness in f.columns:
			filename = folder + fitness + '_SC_mmlatency.png'
			xlabel = 'Market maker latency'
			ylabel = fitness
			legend_labels = list()
			ys = list()
			for nsc_lower, nsc_upper in zip_to_tuples(np.linspace(0,500,6)):
				nchartist_mask = get_nchartist_mask(nsc_lower, nsc_upper)	
				means = get_ssmmlatencyrange_mean(nchartist_mask, ssmmlatencyrange, nsc_lower = nsc_lower)
				ys.append(means[fitness])
				legend_labels.append('%s <= # SC < %s'%(nsc_lower, nsc_upper))
			multiline_xy_plot(means.index, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True)

			filename = folder + fitness + '_SC_sclatency.png'
			xlabel = 'Chartist latency'
			legend_labels = list()
			ys = list()
			for nsc_lower, nsc_upper in zip_to_tuples(np.linspace(0,500,6)):
				nchartist_mask = get_nchartist_mask(nsc_lower, nsc_upper)	
				means = get_sclatencyrange_mean(nchartist_mask, sclatencyrange, nsc_lower = nsc_lower)
				ys.append(means[fitness])
				legend_labels.append('%s <= # SC < %s'%(nsc_lower, nsc_upper))
			multiline_xy_plot(means.index, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True)
	except AttributeError:
		pass

	try:	
		for fitness in f.columns:
			filename = folder + fitness + '_MM_mmlatency.png'
			xlabel = 'Market maker latency'
			ylabel = fitness
			legend_labels = list()
			ys = list()
			for nmm_lower, nmm_upper in zip_to_tuples(range(0,150,25)):
				n_mm_mask = get_nmm_mask(nmm_lower, nmm_upper)	
				means = get_ssmmlatencyrange_mean(n_mm_mask, ssmmlatencyrange, nsc_lower = nsc_lower)
				ys.append(means[fitness])
				legend_labels.append('%s <= # MM < %s'%(nmm_lower, nmm_upper))
			multiline_xy_plot(means.index, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True)

			filename = folder + fitness + '_MM_sclatency.png'
			xlabel = 'Chartist latency'
			ylabel = fitness
			legend_labels = list()
			ys = list()
			for nmm_lower, nmm_upper in zip_to_tuples(range(0,150,25)):
				n_mm_mask = get_nmm_mask(nmm_lower, nmm_upper)	
				means = get_sclatencyrange_mean(n_mm_mask, sclatencyrange, nsc_lower = nsc_lower)
				ys.append(means[fitness])
				legend_labels.append('%s <= # MM < %s'%(nmm_lower, nmm_upper))
			multiline_xy_plot(means.index, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True)		
	except AttributeError:
		pass
コード例 #28
0
def issue_82_parameter_evolution(dataset, vline_x = []):
	def get_stats(name, stats):
		return [getattr(group[name], s)() for s in stats]

	def d3():
		#make_pretty_generation_plot(folder + 'd3_latpars_s.png', generations, [group['ssmm_latency_s'].mean(), group['sc_latency_s'].mean()], 'Average latency std', ['Market makers', 'Chartists'])
		make_pretty_generation_plot(folder + 'nAgents.png', generations, [group['ssmm_nAgents'].mean(), group['sc_nAgents'].mean()], 'Average number of agents', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_nAgents'].std(), group['sc_nAgents'].std()])
		make_pretty_generation_plot(folder + 'thinkpars_s.png', generations, [group['ssmm_think_s'].mean(), group['sc_think_s'].mean()], 'Average if the thinking time standard deviation', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_think_s'].std(), group['sc_think_s'].std()])
		make_pretty_generation_plot(folder + 'thinkpars_mu.png', generations, [group['ssmm_think_mu'].mean(), group['sc_think_mu'].mean()], 'Average of the thinking time distribution mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_think_mu'].std(), group['sc_think_mu'].std()])
		make_pretty_generation_plot(folder + 'latpars_mu.png', generations, [group['ssmm_latency_mu'].mean(), group['sc_latency_mu'].mean()], 'Average of the latency distribution mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_mu'].std(), group['sc_latency_mu'].std()])
		make_pretty_generation_plot(folder + 'latpars_s.png', generations, [group['ssmm_latency_s'].mean(), group['sc_latency_s'].mean()], 'Average of the latency distribution standard deviation', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_s'].std(), group['sc_latency_s'].std()])
		make_pretty_generation_plot(folder + 'scwaittime_mu.png', generations, [group['sc_waitTimeBetweenTrading_mu'].mean()], 'Average of the chartist waiting time distribution mean', ['Chartists'], y_errorbar=[group['sc_waitTimeBetweenTrading_mu'].std()])
		make_pretty_generation_plot(folder + 'sctimehorizon_mu.png', generations, [group['sc_timehorizon_mu'].mean()], 'Average of the chartist time horizon distribution mean', ['Chartists'], y_errorbar=[group['sc_timehorizon_mu'].std()])
	
	def d9():
		make_pretty_generation_plot(folder + 'latpars_s.png', generations, [group['ssmm_latency_s'].mean(), group['sc_latency_s'].mean()], 'Average latency std', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_s'].std(), group['sc_latency_s'].std()])
		make_pretty_generation_plot(folder + 'latpars_mu.png', generations, [group['ssmm_latency_mu'].mean(), group['sc_latency_mu'].mean()], 'Average latency mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_mu'].std(), group['sc_latency_mu'].std()])
		make_pretty_generation_plot(folder + 'thinkpars_s.png', generations, [group['ssmm_think_s'].mean(), group['sc_think_s'].mean()], 'Average think time std', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_think_s'].std(), group['sc_think_s'].std()])
		make_pretty_generation_plot(folder + 'thinkpars_mu.png', generations, [group['ssmm_think_mu'].mean(), group['sc_think_mu'].mean()], 'Average think time mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_think_mu'].std(), group['sc_think_mu'].std()])
		make_pretty_generation_plot(folder + 'scwaittime_mu.png', generations, [group['sc_waitTimeBetweenTrading_mu'].mean()], 'Average of the chartist waiting time distribution mean', ['Chartists'], y_errorbar=[group['sc_waitTimeBetweenTrading_mu'].std()])
		make_pretty_generation_plot(folder + 'sctimehorizon_mu.png', generations, [group['sc_timehorizon_mu'].mean()], 'Average of the chartist time horizon distribution mean', ['Chartists'], y_errorbar=[group['sc_timehorizon_mu'].std	()])
	
	def d10():
		make_pretty_generation_plot(folder + 'latpars_s.png', generations, [group['ssmm_latency_s'].mean(), group['sc_latency_s'].mean()], 'Average latency std', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_s'].std(), group['sc_latency_s'].std()], vline_x = vline_x)
		
		fig, ax, filename = make_pretty_generation_plot(folder + 'latpars_mu.png', generations, [group['ssmm_latency_mu'].mean(), group['sc_latency_mu'].mean()], 'Average latency mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_mu'].std(), group['sc_latency_mu'].std()], vline_x = vline_x)
		ax.fill_between(x=[30, 50], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='red', alpha=0.1)
		ax.fill_between(x=[0, 17], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='blue', alpha=0.1)
		fig.savefig(filename)
		
		fig, ax, filename = make_pretty_generation_plot(folder + 'nAgents.png', generations, [group['ssmm_nAgents'].mean()], 'Average number of agents', ['Market makers'], y_errorbar=[group['ssmm_nAgents'].std()], vline_x = vline_x)
		ax.fill_between(x=[30, 50], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='red', alpha=0.1)
		ax.fill_between(x=[0, 17], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='blue', alpha=0.1)
		fig.savefig(filename)

		fig, ax, filename = make_pretty_generation_plot(folder + 'time_to_reach_new_fundamental.png', generations, get_stats('time_to_reach_new_fundamental', stats), 'Time to reach fundamental after shock', stats, vline_x = vline_x)
		ax.fill_between(x=[0, 17], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='blue', alpha=0.1)
		fig.savefig(filename)
		
		fig, ax, filename = make_pretty_generation_plot(folder + 'stdev.png', generations, get_stats('stdev', stats), 'Standard deviation of trade prices entering stability margin', stats, y_logscale=True, vline_x=vline_x)
		ax.fill_between(x=[30, 50], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='red', alpha=0.1)
		fig.savefig(filename)

		fig, ax, filename = make_pretty_generation_plot(folder + 'round_stable.png', generations, get_stats('round_stable', stats), 'Round stable', stats, y_logscale=True, vline_x=vline_x)
		ax.fill_between(x=[30, 50], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='red', alpha=0.1)
		fig.savefig(filename)
		
		fig, ax, filename = make_pretty_generation_plot(folder + 'overshoot.png', generations, get_stats('overshoot', stats), 'Overshoot', stats, vline_x=vline_x)
		ax.fill_between(x=[30, 50], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='red', alpha=0.1)
		ax.fill_between(x=[0, 17], y1=ax.get_ylim()[0], y2=ax.get_ylim()[1], color='blue', alpha=0.1)
		fig.savefig(filename)

	def d11():
		make_pretty_generation_plot(folder + 'latpars_s.png', generations, [group['ssmm_latency_s'].mean(), group['sc_latency_s'].mean()], 'Average latency std', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_s'].std(), group['sc_latency_s'].std()])
		make_pretty_generation_plot(folder + 'latpars_mu.png', generations, [group['ssmm_latency_mu'].mean(), group['sc_latency_mu'].mean()], 'Average latency mean', ['Market makers', 'Chartists'], y_errorbar=[group['ssmm_latency_mu'].std(), group['sc_latency_mu'].std()])
		make_pretty_generation_plot(folder + 'nAgents.png', generations, [group['sc_nAgents'].mean()], 'Average number of agents', ['Chartists'], y_errorbar=[group['sc_nAgents'].std()])



	from plotting import make_pretty_generation_plot
	folder = make_issue_specific_figure_folder('82_generation_plots', dataset)
	fit,par,gen,ids = IO.load_pickled_generation_dataframe(dataset)
	all_data = concat([fit,par, DataFrame(gen)], axis=1)
	generations = list(set(all_data['gen']))
	group = all_data.groupby('gen')
	stats = ['min', 'mean', 'median']
	
	make_pretty_generation_plot(folder + 'time_to_reach_new_fundamental.png', generations, get_stats('time_to_reach_new_fundamental', stats), 'Time to reach fundamental after shock', stats, vline_x = vline_x)
	make_pretty_generation_plot(folder + 'stdev.png', generations, get_stats('stdev', stats), 'Standard deviation of trade prices entering stability margin', stats, y_logscale=True, vline_x=vline_x)
	make_pretty_generation_plot(folder + 'round_stable.png', generations, get_stats('round_stable', stats), 'Round stable', stats, y_logscale=True, vline_x=vline_x)
	make_pretty_generation_plot(folder + 'overshoot.png', generations, get_stats('overshoot', stats), 'Overshoot', stats, vline_x=vline_x)
	eval(dataset)()