def dmaps_demo(): """Demonstrates the DMAPS algorithm on a swissroll dataset using a predefined epsilon value""" data = gen_swissroll() epsilon = 2 #np.sqrt(5.0) print 'Swissroll generated with', data.shape[0], 'points' print 'Displaying dataset' plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=np.linalg.norm(data[:,:2], axis=1), s=80) # investigate proper epsilon print 'Investigating effect of epsilon on embedding' dmaps.epsilon_plot(data) start = time.clock() k = 4 print 'Computing embedding' eigvals, eigvects = dmaps.embed_data(data, k, epsilon=epsilon) np.savetxt('./eigvects.csv', eigvects, delimiter=',') np.savetxt('./eigvals.csv', eigvals, delimiter=',') np.savetxt('./data.csv', data, delimiter=',') print 'Lanczos solver took', str(time.clock() - start) + 's', 'to find top', k, 'eigenvectors' print 'Displaying dmaps embeddings' for i in range(1, k): for j in range(i+1, k): xlabel = r'$\Phi_{' + str(i+1) + '}$' ylabel = r'$\Phi_{' + str(j+1) + '}$' plot_dmaps.plot_xy(eigvals[i]*eigvects[:,i], eigvals[j]*eigvects[:,j], xlabel=xlabel, ylabel=ylabel, title='Embedding dataset with ' + xlabel + ' and ' + ylabel, color=eigvects[:,2], s=50, scatter=True, hide_ticks=True, cmap='jet')
def dmaps_contour(): data = np.genfromtxt('./data/output/contour_KVSt_to_dmaps_transformed.csv', skip_header=0, delimiter=',') # npts_to_dmaps = 5000 # slice_size = data.shape[0]/npts_to_dmaps # data = data[::slice_size] npts = data.shape[0] # noise_level = 0.01 # data[:,2] = data[:,2] + noise_level*np.random.normal(size=(npts)) # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], xlabel='St', ylabel='V', zlabel='K') # plot_dmaps.epsilon_plot(np.logspace(-4, 2, 10), data) ndims = 5 eigvals, eigvects = dmaps.embed_data(data, ndims, epsilon=1e-1) # plot_dmaps.plot_embeddings(eigvects, eigvals) # # for use with eps=5e-2 # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True) # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,1], xlabel='St', ylabel='V', zlabel='K') # # for use with eps=1e-1 # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True) # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,0], scatter=True) # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,1]/data[:,2], scatter=True) # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,0], scatter=True) for i in range(1, ndims): plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,i], xlabel=r'$\hat{St}$', ylabel=r'$\hat{V}$', zlabel=r'$\hat{K}$')
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_files', nargs='+', help="files from which data will be read") parser.add_argument('--dmap-embeddings', action='store_true', default=False, help="plot 2d DMAP embeddings from eigenvector inputs") parser.add_argument( '--kernel-sums', action='store_true', default=False, help= "plots kernel sums vs. epsilon in kernel for determination of epsilon in DMAP" ) parser.add_argument( '--of-coloring', action='store_true', default=False, help="plots the k1, k2 plane colored by obj. fn. value") parser.add_argument( '--kplane-coloring', action='store_true', default=False, help="plots k1, k2 plane colored by successive DMAP eigenvectors") parser.add_argument( '--param-surface', action='store_true', default=False, help= "plots parameters from 'sloppy_params.csv' in three-dimensional space") parser.add_argument( '--kvs-of-contours', action='store_true', default=False, help= "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (K, V, St, of_eval)" ) parser.add_argument( '--ek-of-contours', action='store_true', default=False, help= "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (epsilon, kappa, of_eval)" ) parser.add_argument( '--kv-of-contours', action='store_true', default=False, help= "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (K, V, of_eval)" ) parser.add_argument( '--of-contour', action='store_true', default=False, help= "plots contour of the objective function, where the input data is a (npts, 2) array whose rows are (param1, param2)" ) args = parser.parse_args() # import data from files # organize each dataset by file header, then by type of data as a dictionary of dictionaries. Each entry of 'dataset' should correspond to dictionary with keys given by 'data_types' and values of actual data. dataset -> data type -> raw data # the overarching 'datasets' dict is not meant to be indexed by its keys which are convoluted tuples created from the header, but rather it is intended to be used as an interable in a "for d in datasets" fashion datasets = {} data_types = [ 'eigvals', 'eigvects', 'sloppy_params', 'epsilons', 'kernel_sums', 'contour', 'guesses' ] for filename in args.input_files: # only import csv files if filename[-4:] == ".csv": data, params = uf.get_data(filename, header_rows=1) dataset_key = tuple([(key, params[key]) for key in params.keys()]) if dataset_key not in datasets.keys(): # no entry currently exists, assign dictionary with entries of empty lists. also assign 'params' entry for dataset dict datasets[dataset_key] = {} datasets[dataset_key]['params'] = params # add data to appropriate dataset, under appropriate 'data_set' key for data_type in data_types: if data_type in filename: datasets[dataset_key][data_type] = data # run desired routines over each dataset for dataset in datasets.values(): # plots the k1, k2 plane colored by obj. fn. value if args.of_coloring: plot_dmaps.plot_xy(dataset['sloppy_params'][:, 0], dataset['sloppy_params'][:, 1], color=dataset['sloppy_params'][:, 2], xlabel=r"$k_1$", ylabel="$k_2$", scatter=True) # plots k1, k2 plane colored by successive DMAP eigenvectors if args.kplane_coloring: # # note the necessity of transposing the eigvects as they are read as row vectors from the file, while the plotting fn. expects column vectors # plot_dmaps.plot_embeddings(dataset['eigvects'].T, dataset['eigvals'], dataset['params']) # now that we're using Eigen's output, no need to transpose eigvects for i in range(1, dataset['eigvects'].shape[1]): plot_dmaps.plot_xy(dataset['sloppy_params'][:, 0], dataset['sloppy_params'][:, 1], color=-dataset['eigvects'][:, i], xlabel=r"$k_1$", ylabel="$k_2$", scatter=True) # plots kernel sums vs. epsilon in kernel for determination of epsilon in DMAP if args.kernel_sums: plot_dmaps.plot_xy(dataset['epsilons'], dataset['kernel_sums'], xlabel=r"$\epsilon$", ylabel="$\sum W_{ij}$", xscale='log', yscale='log') if args.dmap_embeddings: # color by ob. fn. value if dealing with the k1, k2 sloppy param dataset if 'sloppy_params' in dataset.keys(): # plot_dmaps.plot_embeddings(dataset['eigvects'], np.linspace(1,10,dataset['eigvects'].shape[1]), color=dataset['sloppy_params'][:,2]) # # # custom 3d plot, should eventually delete # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # p = ax.scatter(dataset['eigvects'][:,1], dataset['eigvects'][:,2], dataset['eigvects'][:,13], c=dataset['sloppy_params'][:,2]) # ax.set_xlabel(r'$\phi_3$') # ax.set_ylabel(r'$\phi_6$') # ax.set_zlabel(r'$\phi_{12}$') # plt.tick_params(axis='both', which='major', labelsize=0) # fig.colorbar(p) # plt.show(fig) # # end custom plot plot_dmaps.plot_embeddings(dataset['eigvects'].T, dataset['eigvals'], color=dataset['sloppy_params'][:, 1], colorbar=True, k=6) #plot_3d=True) else: plot_dmaps.plot_embeddings(dataset['eigvects'].T, dataset['eigvals'], plot_3d=False) if args.param_surface: # assume only three parameters have been used for investigation and plot the values in log-space plot_dmaps.plot_xyz(dataset['sloppy_params'][:, 0], dataset['sloppy_params'][:, 1], dataset['sloppy_params'][:, 2], xlabel=r'$K_M$', ylabel=r'$V_M$', zlabel=r'$\epsilon$', color=dataset['sloppy_params'][:, 2], labelsize=24) if args.kvs_of_contours: print 'loaded data, plotting' plot_of_k_v_st_contours(dataset['contour']) if args.ek_of_contours: plot_of_contours(dataset['contour'], r'$\epsilon$', r'$\kappa$') if args.kv_of_contours: plot_of_contours(dataset['contour'], r'$K$', r'$V$') if args.of_contour: plot_contour(dataset['contour']) #, guesses=dataset['guesses'])
def dmaps_param_set_grad_kernel(): """DMAP a collection of sloppy parameter combinations using a kernel which accounts for objective function value and should, ideally, uncover the important parameter(s) in the model""" # set up base model A0 = 1.0 # initial concentration of A k1_true = 1.0 kinv_true = 1000.0 k2_true = 1000.0 decay_rate = k1_true * k2_true / ( kinv_true + k2_true ) # effective rate constant that governs exponential growth rate # start at t0 = 0, end at tf*decay_rate = 4 ntimes = 20 # arbitrary times = np.linspace(0, 4 / decay_rate, ntimes) model = Rawlings_Model(times, A0, k1_true, kinv_true, k2_true, using_sympy=True) # import existing data data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',') of_tol = 0.4 # from plotting with scratch.py somedata = data[data[:, 0] < of_tol] # only keep npts points due to computational considerations npts = 6000 slice_size = somedata.shape[0] / npts somedata = somedata[::slice_size] # keff = somedata[:,1]*somedata[:,3]/(somedata[:,2] + somedata[:,3]) log_params_data = np.log10(somedata[:, 1:]) # add some noise noise_level = 0.02 log_params_data = log_params_data + noise_level * np.random.normal( size=log_params_data.shape) # log_params_data = np.log10(data[:,1:]) # evaluate various epsilons for DMAP kernel neps = 5 # number of epsilons to evaluate epsilons = np.logspace(-3, 2, neps) kernels = [ DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient) for epsilon in epsilons ] # # investigate proper choice of epsilon # plot_dmaps.kernel_plot(kernels, epsilons, somedata[:,1:]) # use un-logged data if using gradient of ob. fn. in kernel # DMAP with o.f. kernel, appears the epsilon = 20 is appropriate epsilon = 20.0 kernel = DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient) k = 15 eigvals, eigvects = dmaps.embed_data_customkernel(somedata[:, 1:], k, kernel) plot_dmaps.plot_xyz(somedata[:, 1], somedata[:, 2], somedata[:, 3], color=eigvects[:, 1]) plot_dmaps.plot_xyz(somedata[:, 1], somedata[:, 2], somedata[:, 3], color=eigvects[:, 2])
def dmaps_param_set(): """Performs DMAP of log(parameter) set that fall within some ob. fn. tolerance""" # import data and save only those parameter combinations such that error(k1, kinv, k2) < tol data = np.load('./temp.pkl') # data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',') of_max = 1e-3 k1_max = 10 kinv_min = 100 k2_min = 100 # data = data[data[:,0] < of_max] # data = data[data[:,1] < k1_max] # data = data[data[:,2] > kinv_min] # data = data[data[:,3] > k2_min] # slice = 5000 # data = data[::data.shape[0]/slice] # data.dump('./temp.pkl') # of_max = 0.002 # from plotting with scratch.py # data = data[data[:,0] < of_max] # slice_size = 4 # used to further trim data # data = data[::slice_size] print 'have', data.shape[0], 'pts in dataset' keff = data[:, 1] * data[:, 3] / (data[:, 2] + data[:, 3]) log_params_data = np.log10(data[:, 1:]) # # investigate which epsilon to choose # neps = 10 # number of epsilons to investigate # epsilons = np.logspace(-3,2, neps) # plot_dmaps.epsilon_plot(epsilons, log_params_data) # dmap the log data epsilon = 0.3 # from epsilon_plot k = 12 # number of dimensions for embedding # search through files in ./data to see if the embedding has already been computed filename_id = 'tol-' + str(of_max) + '-k-' + str(k) found_previous_embeddings = False eigvals, eigvects = None, None for filename in os.listdir('./data'): if filename_id in filename: # found previously saved data, import and do not recompute eigvects = np.genfromtxt('./data/dmaps-eigvects--tol-' + str(of_max) + '-k-' + str(k) + '.csv', delimiter=',') eigvals = np.genfromtxt('./data/dmaps-eigvals--tol-' + str(of_max) + '-k-' + str(k) + '.csv', delimiter=',') found_previous_embeddings = True break if found_previous_embeddings is False: print 'plotting from previous points' eigvals, eigvects = dmaps.embed_data(log_params_data, k, epsilon=epsilon) np.savetxt('./data/dmaps-eigvects--tol-' + str(of_max) + '-k-' + str(k) + '.csv', eigvects, delimiter=',') np.savetxt('./data/dmaps-eigvals--tol-' + str(of_max) + '-k-' + str(k) + '.csv', eigvals, delimiter=',') plot_dmaps.plot_xyz(log_params_data[:, 0], log_params_data[:, 1], log_params_data[:, 2], color=eigvects[:, 1], xlabel='\n\n' + r'$\log(k_1)$', ylabel='\n\n' + r'$\log(k_{-1})$', zlabel='\n\n' + r'$\log(k_2)$') plot_dmaps.plot_xyz(log_params_data[:, 0], log_params_data[:, 1], log_params_data[:, 2], color=eigvects[:, 2], xlabel='\n\n' + r'$\log(k_1)$', ylabel='\n\n' + r'$\log(k_{-1})$', zlabel='\n\n' + r'$\log(k_2)$')
# ax.xaxis.set_scale('log') # ax.yaxis.set_scale('log') # ax.zaxis.set_scale('log') # dmaps stuff import plot_dmaps eigvects = np.genfromtxt('./data/dmaps-eigvects--tol-' + str(of_tol) + '-k-' + str(k) + '.csv', delimiter=',') eigvals = np.genfromtxt('./data/dmaps-eigvals--tol-' + str(of_tol) + '-k-' + str(k) + '.csv', delimiter=',') plt.scatter(eigvects[:, 1], np.ones(eigvects.shape[0]), c=np.log10(keff), lw=0) plt.show() plot_dmaps.plot_xyz(log_params_data[:, 0], log_params_data[:, 1], log_params_data[:, 2], color=eigvects[:, 1], xlabel='log(k1)', ylabel='log(kinv)', zlabel='log(k2)') plot_dmaps.plot_xyz(log_params_data[:, 0], log_params_data[:, 1], log_params_data[:, 2], color=eigvects[:, 2], xlabel='log(k1)', ylabel='log(kinv)', zlabel='log(k2)')