def dmaps_annulus(): """Uses Lafon DMAP to generate embedding of annulus such that eigenvectors are constant on level sets, f = x^2 + y^2""" # generate dataset npts = 3000 # rs = np.random.uniform(low=0.5, high=1.5, size=npts) # thetas = np.random.uniform(high=2*np.pi, size=npts) # data = np.array((rs*np.cos(thetas), rs*np.sin(thetas))).T data = np.random.uniform(low=0.1, size=(npts,2)) grad = lambda x: 2*x # visualize param set fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(data[:,0], data[:,1]) plt.show(fig) # perform dmap epsilon = 1e-5 kernel = dmaps_kernels.gradient_kernel(epsilon, grad) k = 12 eigvals, eigvects = dmaps.embed_data_customkernel(data, k, kernel) # plot output, color param plane by output eigvectors for i in range(1,8): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(data[:,0], data[:,1], c=eigvects[:,i]) plt.show(fig)
def gradient_dmaps(): """Testing the effects of including gradient information in the DMAPS kernel on the resulting embedding.""" # set values for data generation and algorithm performance k1_true = 0.1 kinv_true = 0.1 k2_true = 10000.0 alpha_true = k1_true*k1_true/(kinv_true*kinv_true + k2_true) beta_true = k2_true/(kinv_true*kinv_true + k2_true) alpha_true = np.array((alpha_true,)) beta_true = np.array((beta_true,)) # the contour value for which data will be generated contour = 1e-1 # psa stepsize ds = 1e-3 times = np.linspace(1, 5, 10) data = get_sloppy_traj(beta_true, alpha_true, times) of = ab_fn(data, times, contour, ds) # set up psa solver psa_solver = PSA.PSA(of.f, of.Df) # perturb beta to ensure nonsingular jacobian in psa routine beta_perturbed = 1.001*beta_true ab_contour = psa_solver.find_branch(alpha_true, beta_perturbed, ds, nsteps=1000) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(ab_contour[:,0], ab_contour[:,1]) plt.savefig('./figs/embeddings/dmaps_data.png') k = 6 eigvals, dmaps_embedding = dmaps.embed_data_customkernel(ab_contour, k, of.gradient_dmaps_kernel) for i in range(1, k): for j in range(i+1, k): ax.cla() ax.scatter(eigvals[i]*dmaps_embedding[:,i], eigvals[j]*dmaps_embedding[:,j]) plt.savefig('./figs/embeddings/dmaps' + str(i) + str(j) + '.png')
def zxy_kernel(): """Examines kernel eigenvectors for z(x,y) given on a uniform rectangle""" sqrt_npts = 50 npts = sqrt_npts * sqrt_npts xgrid, ygrid = np.meshgrid(np.linspace(-1, 1, sqrt_npts), np.linspace(0, 1, sqrt_npts)) xydata = np.array((xgrid.flatten(), ygrid.flatten())).T # z = x^3 zdata = np.power(xydata[:, 0], 3) fulldata = zip(xydata, zdata) k = 20 epsilon = 5e-2 lam = 1 kernel = Data_Kernel(epsilon, lam) eigvals, eigvects = dmaps.embed_data_customkernel(fulldata, k, kernel, symmetric=True) print 'dun'
def dmaps_param_set_grad_kernel(): """DMAP a collection of sloppy parameter combinations using a kernel which accounts for objective function value and should, ideally, uncover the important parameter(s) in the model""" # set up base model A0 = 1.0 # initial concentration of A k1_true = 1.0 kinv_true = 1000.0 k2_true = 1000.0 decay_rate = k1_true * k2_true / ( kinv_true + k2_true ) # effective rate constant that governs exponential growth rate # start at t0 = 0, end at tf*decay_rate = 4 ntimes = 20 # arbitrary times = np.linspace(0, 4 / decay_rate, ntimes) model = Rawlings_Model(times, A0, k1_true, kinv_true, k2_true, using_sympy=True) # import existing data data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',') of_tol = 0.4 # from plotting with scratch.py somedata = data[data[:, 0] < of_tol] # only keep npts points due to computational considerations npts = 6000 slice_size = somedata.shape[0] / npts somedata = somedata[::slice_size] # keff = somedata[:,1]*somedata[:,3]/(somedata[:,2] + somedata[:,3]) log_params_data = np.log10(somedata[:, 1:]) # add some noise noise_level = 0.02 log_params_data = log_params_data + noise_level * np.random.normal( size=log_params_data.shape) # log_params_data = np.log10(data[:,1:]) # evaluate various epsilons for DMAP kernel neps = 5 # number of epsilons to evaluate epsilons = np.logspace(-3, 2, neps) kernels = [ DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient) for epsilon in epsilons ] # # investigate proper choice of epsilon # plot_dmaps.kernel_plot(kernels, epsilons, somedata[:,1:]) # use un-logged data if using gradient of ob. fn. in kernel # DMAP with o.f. kernel, appears the epsilon = 20 is appropriate epsilon = 20.0 kernel = DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient) k = 15 eigvals, eigvects = dmaps.embed_data_customkernel(somedata[:, 1:], k, kernel) plot_dmaps.plot_xyz(somedata[:, 1], somedata[:, 2], somedata[:, 3], color=eigvects[:, 1]) plot_dmaps.plot_xyz(somedata[:, 1], somedata[:, 2], somedata[:, 3], color=eigvects[:, 2])
def dmaps_param_set_data_kernel(): """DMAP a collection of sloppy parameter combinations using a kernel which accounts for the model predictions at each parameter combination which will, ideally, uncover the important parameter(s) in the model""" # set up base model A0 = 1.0 # initial concentration of A k1_true = 1.0 kinv_true = 1000.0 k2_true = 1000.0 decay_rate = k1_true * k2_true / ( kinv_true + k2_true ) # effective rate constant that governs exponential growth rate # start at t0 = 0, end at tf*decay_rate = 4 ntimes = 20 # arbitrary times = np.linspace(0, 4 / decay_rate, ntimes) model = Rawlings_Model(times, A0, k1_true, kinv_true, k2_true, using_sympy=False) # import existing data data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',') of_tol = 0.4 # from plotting with scratch.py somedata = data[data[:, 0] < of_tol] # only keep npts points due to computational considerations npts = 4000 slice_size = somedata.shape[0] / npts # throw out o.f. evals in first column somedata = somedata[::slice_size, 1:] keff = somedata[:, 0] * somedata[:, 2] / (somedata[:, 1] + somedata[:, 2]) somedata = somedata[keff < 1] npts = somedata.shape[0] print 'sending a cherry-picked sample of', npts, 'to dmaps' trajectories = np.empty( (npts, ntimes) ) # each row contains [(k1, kinv, k2), (model prediction at k1, kinv, k2)] # find model predictions from parameter set for i, param_set in enumerate(somedata): trajectories[i] = model.gen_timecourse(*param_set) # combine into one datastructure full_data = zip(somedata, trajectories) print 'generated full dataset, proceeding to dmaps' # neps = 5 # number of epsilons to evaluate # epsilons = np.logspace(-3, 2, neps) # kernels = [DMAPS_Data_Kernel(epsilon) for epsilon in epsilons] # # investigate proper choice of epsilon # dmaps.kernel_plot(kernels, epsilons, full_data) # use un-logged data, as kernel explicitly takes log of parameters # perform dmaps, try epsilon=1e-1 k = 20 epsilon = 1e-1 kernel = DMAPS_Data_Kernel(epsilon) eigvals, eigvects = dmaps.embed_data_customkernel(full_data, k, kernel) np.savetxt('./data/data-dmaps-eigvals.csv', eigvals.real, delimiter=',') np.savetxt('./data/data-dmaps-eigvects.csv', eigvects.real, delimiter=',') np.savetxt('./data/data-dmaps-params.csv', somedata, delimiter=',') print 'saved dmaps output as ./data/data-dmaps...'
def dmaps_two_important_one_sloppy(): """Generate parameter combinations in which there are two important (alpha, lambda) and one sloppy (epsilon) parameter(s) and use DMAPS with a kernel that accounts for both parameter-space distance and distances in model output, with the aim to uncover the alpha and lambda parameters""" if os.path.isfile('./data/a-lam-eps-of-params-new.pkl'): # already have data saved, load and trim data to approx 5000 pts for DMAPS params = np.load('./data/a-lam-eps-of-params.pkl') trajs = np.load('./data/a-lam-eps-trajs.pkl') tol = 2e-2 trajs = trajs[params[:,3] < tol] params = params[params[:,3] < tol] params = params[:,:3] print 'Have', params.shape[0], 'pts in dataset' data = zip(params, trajs) # epsilons = np.logspace(-3, 1, 5) # kernels = [DMAPS_Data_Kernel(epsilon) for epsilon in epsilons] # dmaps.kernel_plot(kernels, epsilons, data) epsilon = 1e-2 # from epsilon plot kernel = DMAPS_Data_Kernel(epsilon) k = 30 eigvals, eigvects = dmaps.embed_data_customkernel(data, k, kernel, symmetric=True) eigvals.dump('./data/dmaps-data-kernel-eigvals.pkl') eigvects.dump('./data/dmaps-data-kernel-eigvects.pkl') for i in range(1,k): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(np.log10(params[:,0]), np.log10(params[:,1]), np.log10(params[:,2]), c=eigvects[:,i]) plt.savefig('./figs/data-space-dmaps' + str(i) + '.png') # plt.show() else: # need to generate dataset # CREATE DATASET (no dataset exists): # init MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() nprocs = comm.Get_size() # set up base system # specify ode parameters (a_true, b_true, lam_true, eps_true) = (0.1, 1.0, 0.1, 0.001) params = np.array((a_true, b_true, lam_true, eps_true)) # create system with given params z_system = ZM.Z_Model(params) # set up integration times t0 = 3*eps_true # 0 tfinal = 1/lam_true dt = eps_true ntimes = 50 times = np.linspace(t0, tfinal, ntimes) # get true trajectory based on true initial conditions x0_true = np.array((1, a_true)) x_true_traj = z_system.get_trajectory(x0_true, times) # set up sampling grid and storage space for obj. fn. evals # lam_max = 1.2 nsamples = 100 lam_samples = np.linspace(0.9*lam_true, 1.1*lam_true, nsamples) a_samples = np.linspace(0.9*a_true, 1.1*a_true, nsamples) epsmin = 1e-6 epsmax = 1e-1 eps_samples = np.logspace(np.log10(epsmin), np.log10(epsmax), nsamples) # add noise to each individual parameter combination to create nice dataset params_noise = np.empty((nsamples*nsamples*nsamples, 4)) params_noise[:,0] = 0.01*np.random.normal(loc=0, size=nsamples*nsamples*nsamples) # same noise for both lam and a params_noise[:,1] = 0 # no noise in b params_noise[:,2] = 0.01*np.random.normal(loc=0, size=nsamples*nsamples*nsamples) params_noise[:,3] = 0.1*np.random.normal(loc=0, size=nsamples*nsamples*nsamples) # noise for eps must vary with scale # eps_samples = np.logspace(-6, np.log10(epsmax), nsamples) params = np.empty((nsamples*nsamples*nsamples, 4)) # space for obj. fn. evals trajs = np.empty((nsamples*nsamples*nsamples, ntimes, 2)) count = 0 for lam in uf.parallelize_iterable(lam_samples, rank, nprocs): for eps in eps_samples: for a in a_samples: new_params = np.array((a, b_true, lam, eps)) + params_noise[count]*np.array((1,1,1,eps)) z_system.change_parameters(new_params) try: x_sample_traj = z_system.get_trajectory(x0_true, times) except CustomErrors.IntegrationError: continue else: params[count] = (new_params[0], new_params[2], new_params[3], get_of(x_sample_traj, x_true_traj)) # a, lam, eps trajs[count] = x_sample_traj count = count + 1 params = params[:count] all_params = comm.gather(params, root=0) trajs = trajs[:count] all_trajs = comm.gather(trajs, root=0) if rank is 0: all_params = np.concatenate(all_params) all_params.dump('./data/a-lam-eps-of-params-new.pkl') all_trajs = np.concatenate(all_trajs) all_trajs.dump('./data/a-lam-eps-trajs-new.pkl') print '******************************\nData saved in ./data/a-lam-eps-...\n******************************'