コード例 #1
0
ファイル: test_dmaps.py プロジェクト: glinka/dmaps
def dmaps_demo():
    """Demonstrates the DMAPS algorithm on a swissroll dataset using a predefined epsilon value"""

    data = gen_swissroll()
    epsilon = 2 #np.sqrt(5.0)
    print 'Swissroll generated with', data.shape[0], 'points'
    print 'Displaying dataset'
    plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=np.linalg.norm(data[:,:2], axis=1), s=80)
    # investigate proper epsilon
    print 'Investigating effect of epsilon on embedding'
    dmaps.epsilon_plot(data)
    start = time.clock()
    k = 4
    print 'Computing embedding'
    eigvals, eigvects = dmaps.embed_data(data, k, epsilon=epsilon)

    np.savetxt('./eigvects.csv', eigvects, delimiter=',')
    np.savetxt('./eigvals.csv', eigvals, delimiter=',')
    np.savetxt('./data.csv', data, delimiter=',')

    print 'Lanczos solver took', str(time.clock() - start) + 's', 'to find top', k, 'eigenvectors'
    print 'Displaying dmaps embeddings'
    for i in range(1, k):
        for j in range(i+1, k):
            xlabel = r'$\Phi_{' + str(i+1) + '}$'
            ylabel = r'$\Phi_{' + str(j+1) + '}$'
            plot_dmaps.plot_xy(eigvals[i]*eigvects[:,i], eigvals[j]*eigvects[:,j], xlabel=xlabel, ylabel=ylabel, title='Embedding dataset with ' + xlabel + ' and ' + ylabel, color=eigvects[:,2], s=50, scatter=True, hide_ticks=True, cmap='jet')
コード例 #2
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def tempfig3():
    n = 100
    y = np.linspace(0, 1, n)
    # data = y # 2*np.arcsin(y)/np.pi
    beta = 1
    # data = ((beta + 1)/beta - np.sqrt(np.power((beta + 1)/beta, 2) - 4*y/beta))/2
    data = ((beta + 1) / beta -
            np.sqrt(np.power((beta + 1) / beta, 2) - 4 * y / beta)) / 8
    xdata = np.empty((4 * n))
    xdata[:n] = data
    xdata[n:2 * n] = 0.5 - data
    xdata[2 * n:3 * n] = 0.5 + data
    xdata[3 * n:4 * n] = 1 - data

    ydata = np.empty((4 * n))
    ydata[:n] = data
    ydata[n:2 * n] = 0.25 + data[::-1]
    ydata[2 * n:3 * n] = 0.5 + data
    ydata[3 * n:4 * n] = 0.75 + data[::-1]

    plt.scatter(ydata, np.ones(4 * n))
    plt.scatter(xdata, np.ones(4 * n), c='r')
    plt.show()

    eps = 2 * np.max(data[1:] - data[:-1])
    eigvals, eigvects = dmaps.embed_data(xdata, k=5, epsilon=eps)
    eigvects.dump('./data/tempeigvects.pkl')
    eigvals.dump('./data/tempeigvals.pkl')
    plt.scatter(xdata, eigvects[:, 1])
    plt.scatter(xdata, eigvects[0, 1] * np.cos(np.pi * xdata), c='r')
    plt.show()
コード例 #3
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def dmaps_test():
    """Tests DMAP results at different values of epsilon and t"""
    # pts = np.random.uniform(size=(npts,2))
    npts = 20
    x = np.linspace(0,1,npts)
    y = np.linspace(0,1,npts)
    xpts, ypts = np.meshgrid(x,y)
    # xpts = np.flatten(xpts)
    # ypts = np.flatten(ypts)
    pts = np.array((xpts.flatten(),ypts.flatten())).T
    plt.scatter(pts[:,0], pts[:,1])
    plt.show()
    k = 6
    neps = 20
    epsilons = np.linspace(0.01, 0.1, neps)
    lam2s = np.empty(neps)
    for i, eps in enumerate(epsilons):
        print eps
        eigvals, eigvects = dmaps.embed_data(pts, k, epsilon=eps)
        lam2s[i] = eigvals[1]
    lam2s.dump('./lambdas.pkl')
    epsilons.dump('./eps.pkl')    
    # print 'log(lambdas)', np.log(lam2s)
    # print '1/epsilons', 1/epsilons
    # print 'product', np.log(lam2s)*epsilons
    plt.scatter(np.log(1/epsilons), np.log(lam2s), s=50)
    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # ax.scatter(np.log(lam2s), 1/epsilons, s=50)
    # ax.set_xlim((np.min(np.log(lam2s)), np.max(np.log(lam2s))))
    plt.show()
コード例 #4
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def dmaps_sloppy_params():
    """Performs normal DMAP on log-param data to find a parameterization of the sloppy surface. future work will add data dmaps to find important directions too"""
    data = np.load('./data/input/param-sample-K-V-sig-eps-kap.pkl')
    max_ofval = 10e-3
    # take log and remove of val
    logdata = np.log10(data[data[:,5] < max_ofval][:,:5])
    print 'have', logdata.shape[0], 'points'

    k = 12
    eps = 0.3 # max range should be np.log10(3/2) - np.log10(1/2) \approx 0.5
    eigvals, eigvects = dmaps.embed_data(logdata, k, eps)
コード例 #5
0
def main(of_filename, params_filename):
    """Computes the DMAPS embedding of a collection of parameter sets that adaquately fit the Brynildsen lab's biological model

    Args:
        of_filename (string): location of csv file containing objective function evaluations of the paramater sets, relative to dirrectory from which this file is run
        params_filename (string): location of csv file containing the paramater sets to be embedded, relative to dirrectory from which this file is run
    """
    # get data
    of_data = uf.get_data(of_filename)
    params_data = uf.get_data(params_filename)
    # dimension of parameter sets
    p = params_data.shape[1]
    # number of data points
    n = params_data.shape[0]
    # scale each variable to have mean one
    params_data = n * params_data / np.sum(params_data, 0)
    print np.sum(np.sum(params_data, 0) / n - np.ones(p))
    # # investigate proper epsilon value
    # dmaps.epsilon_plot(np.logspace(-2, 2, 50), params_data, fraction_kept=0.05)
    # embed data
    # # perform DMAPS over range of possible epsilons as determined from the plot above
    # nepsilons = 6
    # epsilons = np.logspace(4,6,nepsilons)
    nepsilons = 1
    epsilons = [3.0]
    ndims = 20
    # eigvals = np.empty((nepsilons, ndims))
    # eigvects = np.empty((nepsilons, n, ndims))

    # fig = plt.figure()
    # ax = fig.add_subplot(111)

    for eps in enumerate(epsilons):
        eigvals, eigvects = dmaps.embed_data(params_data,
                                             k=ndims,
                                             epsilon=eps[1])
    #     # eigvals[eps[0]], eigvects[eps[0]] = dmaps.embed_data(params_data, k=ndims, epsilon=eps[1])

    #     ax.plot(range(1,ndims+1), eigvals, label=r'$\epsilon=' + str(eps[1]) + '$')
    # ax.set_xlabel('index')
    # ax.set_ylabel('eigenvalue')
    # ax.set_yscale('log')
    # ax.set_xlim((1,ndims))
    # ax.legend()
    # plt.show()

    # # save eigvals for analysis
    np.savetxt('./data/output/dmaps/euclid/py_eigvals.csv',
               eigvals,
               delimiter=',')
コード例 #6
0
def dmaps_two_important_one_sloppy_only_data():
    """Generate parameter combinations in which there are two important (alpha, lambda) and one sloppy (epsilon) parameter(s) and use DMAPS to uncover them, but uses as data only the trajectories and not any information about the objective function"""
    params = np.load('./data/a-lam-eps-of-params-new.pkl')
    trajs = np.load('./data/a-lam-eps-trajs-new.pkl')
    tol = 1.5e-4 # 2e-2 for old data
    trajs = trajs[params[:,3] < tol]
    params = params[params[:,3] < tol]
    params = params[:,:3]
    print 'Have', params.shape[0], 'pts in dataset'
    # epsilons = np.logspace(-3, 1, 5)
    # dmaps.epsilon_plot(epsilons, trajs)
    epsilon = 1e-2 # from epsilon plot
    k = 80
    eigvals, eigvects = dmaps.embed_data(trajs, k, epsilon=epsilon)
    eigvals.dump('./data/dmaps-data-kernel-eigvals.pkl')
    eigvects.dump('./data/dmaps-data-kernel-eigvects.pkl')
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.hold(False)
    ax.scatter(np.log10(params[:,0]), np.log10(params[:,1]), np.log10(params[:,2]), c=eigvects[:,i])
    plt.show()
コード例 #7
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def dmaps_contour():
    data = np.genfromtxt('./data/output/contour_KVSt_to_dmaps_transformed.csv', skip_header=0, delimiter=',')
    # npts_to_dmaps = 5000
    # slice_size = data.shape[0]/npts_to_dmaps
    # data = data[::slice_size]
    npts = data.shape[0]
    # noise_level = 0.01
    # data[:,2] = data[:,2] + noise_level*np.random.normal(size=(npts))
    # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], xlabel='St', ylabel='V', zlabel='K')
    # plot_dmaps.epsilon_plot(np.logspace(-4, 2, 10), data)
    ndims = 5
    eigvals, eigvects = dmaps.embed_data(data, ndims, epsilon=1e-1)
    # plot_dmaps.plot_embeddings(eigvects, eigvals)
    # # for use with eps=5e-2
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,1], xlabel='St', ylabel='V', zlabel='K')
    # # for use with eps=1e-1
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,0], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,0], scatter=True)
    for i in range(1, ndims):
        plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,i], xlabel=r'$\hat{St}$', ylabel=r'$\hat{V}$', zlabel=r'$\hat{K}$')
コード例 #8
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def dmaps_param_set():
    """Performs DMAP of log(parameter) set that fall within some ob. fn. tolerance"""
    # import data and save only those parameter combinations such that error(k1, kinv, k2) < tol

    data = np.load('./temp.pkl')
    # data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',')

    of_max = 1e-3
    k1_max = 10
    kinv_min = 100
    k2_min = 100
    # data = data[data[:,0] < of_max]
    # data = data[data[:,1] < k1_max]
    # data = data[data[:,2] > kinv_min]
    # data = data[data[:,3] > k2_min]
    # slice = 5000
    # data = data[::data.shape[0]/slice]
    # data.dump('./temp.pkl')

    # of_max = 0.002 # from plotting with scratch.py
    # data = data[data[:,0] < of_max]
    # slice_size = 4 # used to further trim data
    # data = data[::slice_size]
    print 'have', data.shape[0], 'pts in dataset'
    keff = data[:, 1] * data[:, 3] / (data[:, 2] + data[:, 3])
    log_params_data = np.log10(data[:, 1:])
    # # investigate which epsilon to choose
    # neps = 10 # number of epsilons to investigate
    # epsilons = np.logspace(-3,2, neps)
    # plot_dmaps.epsilon_plot(epsilons, log_params_data)
    # dmap the log data
    epsilon = 0.3  # from epsilon_plot
    k = 12  # number of dimensions for embedding

    # search through files in ./data to see if the embedding has already been computed
    filename_id = 'tol-' + str(of_max) + '-k-' + str(k)
    found_previous_embeddings = False

    eigvals, eigvects = None, None

    for filename in os.listdir('./data'):
        if filename_id in filename:
            # found previously saved data, import and do not recompute
            eigvects = np.genfromtxt('./data/dmaps-eigvects--tol-' +
                                     str(of_max) + '-k-' + str(k) + '.csv',
                                     delimiter=',')
            eigvals = np.genfromtxt('./data/dmaps-eigvals--tol-' +
                                    str(of_max) + '-k-' + str(k) + '.csv',
                                    delimiter=',')
            found_previous_embeddings = True
            break

    if found_previous_embeddings is False:
        print 'plotting from previous points'
        eigvals, eigvects = dmaps.embed_data(log_params_data,
                                             k,
                                             epsilon=epsilon)
        np.savetxt('./data/dmaps-eigvects--tol-' + str(of_max) + '-k-' +
                   str(k) + '.csv',
                   eigvects,
                   delimiter=',')
        np.savetxt('./data/dmaps-eigvals--tol-' + str(of_max) + '-k-' +
                   str(k) + '.csv',
                   eigvals,
                   delimiter=',')

    plot_dmaps.plot_xyz(log_params_data[:, 0],
                        log_params_data[:, 1],
                        log_params_data[:, 2],
                        color=eigvects[:, 1],
                        xlabel='\n\n' + r'$\log(k_1)$',
                        ylabel='\n\n' + r'$\log(k_{-1})$',
                        zlabel='\n\n' + r'$\log(k_2)$')
    plot_dmaps.plot_xyz(log_params_data[:, 0],
                        log_params_data[:, 1],
                        log_params_data[:, 2],
                        color=eigvects[:, 2],
                        xlabel='\n\n' + r'$\log(k_1)$',
                        ylabel='\n\n' + r'$\log(k_{-1})$',
                        zlabel='\n\n' + r'$\log(k_2)$')
コード例 #9
0
def run_dmaps():
    # k1_true = 0.1
    # kinv_true = 0.1
    # k2_true = 1000.0

    # k1_true = 10.0
    # kinv_true = 10.0
    # k2_true = 100.0

    k1_true = 1.0
    kinv_true = 1.0
    k2_true = 100.0

    beta_true = k2_true/(kinv_true*kinv_true + k2_true)
    alpha_true = k1_true*k1_true/(kinv_true*kinv_true + k2_true)
    # define objective function using sympy
    # create sympy matrix from numpy array
    times = np.linspace(1, 5, 10)
    ntimes = times.shape[0]
    data = sympy.Matrix(get_sloppy_traj(beta_true, alpha_true, times))
    x,y,z = sympy.symbols('x,y,z', real=True)
    # convert times array to sympy type, then take element-wise exponential
    times = sympy.Matrix(times).transpose()
    k1,k2,kinv = sympy.symbols('k1,k2,kinv')
    ks = [k1,k2,kinv]
    # redefine beta to include (0,40), jk
    beta = k2/(kinv*kinv + k2)
    alpha = k1*k1/(kinv*kinv + k2)
    exp_alpha = -alpha*times
    exp_alpha = exp_alpha.applyfunc(sympy.exp)
    ys = sympy.zeros(3, ntimes)
    ys[0,:] = exp_alpha
    ys[1,:] = beta*exp_alpha
    ys[2,:] = sympy.ones(1, ntimes) - exp_alpha
    # make the sympy obj. fn., essentially the squared frobenius norm of the matrix created by stacking vectors
    # at different sampling times together
    f = sum((ys - data).applyfunc(lambda x: x*x))
    sloppy_of = of.ObjectiveFunction(f, ks)
    # print of.gradient([k1_true, k2_true, kinv_true]), of.hessian([k1_true, k2_true, kinv_true])
    # ought to be a smarter way by converting from sympy to numpy instead of redefining
    # data = np.array(data)
    # times = np.array(times)

    # contour = 6e-2 w/ ks = [10,10,100]
    contour = 1e-3
    beta_true = np.array((beta_true,))
    alpha_true = np.array((alpha_true,))
    times = np.linspace(1, 5, 10)
    data = get_sloppy_traj(beta_true, alpha_true, times)
    ds = 5e-4
    altof = ab_fn(data, times, contour, ds)
    # set up psa solver
    psa_solver = PSA.PSA(altof.f, altof.Df)
    # perturb beta to ensure nonsingular jacobian in psa routine
    beta_perturbed = 1.001*beta_true
    ab_contour = psa_solver.find_branch(alpha_true, beta_perturbed, ds, nsteps=400)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(ab_contour[:,1], ab_contour[:,0])
    ax.set_xlabel(r'$\alpha$')
    ax.set_ylabel(r'$\beta$')
    plt.show(fig)

    npts = ab_contour.shape[0]
    alphas = ab_contour[:,1]
    betas = ab_contour[:,0]
    nks = 10
    fig3d = plt.figure()
    ax3d = fig3d.add_subplot(111, projection='3d')
    npts_toplot = 100
    spacing = npts/npts_toplot
    # for each sampled alpha/beta pair, of which there will be 'npts_toplot + 1', we find 'nks' values of k1, k2, kinv
    dmaps_data = np.empty(((npts_toplot+1)*nks, 3))
    for i in range(npts):
        if i % spacing == 0:
            k1s = np.abs(np.logspace(0.1, 100, nks) + np.random.normal(size=nks))
            k2s = betas[i]*k1s*k1s/(alphas[i])
            kinvs = np.sqrt((1 - betas[i])/(alphas[i]*betas[i]))*k1s
            dmaps_data[nks*i/spacing:nks*(i/spacing+1),:] = np.array((k1s, k2s, kinvs)).T
            # try log to scale
            # dmaps_data[nks*i/spacing:nks*(i/spacing+1),:] = np.log(np.array((k1s, k2s, kinvs)).T)
            ax3d.scatter(k1s, k2s, kinvs, c='b')
    # # see what data set looks like
    # ax3d.set_xscale('log')
    # ax3d.set_yscale('log')
    # ax3d.set_zscale('log')
    ax3d.set_xlim(left=0)
    ax3d.set_ylim(bottom=0)
    ax3d.set_zlim(bottom=0)
    ax3d.set_xlabel(r'$k_1$')
    ax3d.set_ylabel(r'$k_2$')
    ax3d.set_zlabel(r'$k_{-1}$')
    # ax3d.set_xlabel(r'$log(k_1)$')
    # ax3d.set_ylabel(r'$log(k_2)$')
    # ax3d.set_zlabel(r'$log(k_{-1})$')
    plt.show(fig3d)
    # plt.savefig('./figs/ks_3d.png')
    # do dmaps
    k = 6
    # add tiny amount to ensure all-positive eigenvalues
    H = sloppy_of.hessian([k1_true, k2_true, kinv_true]) + 1e-16*np.identity(3)
    H_inv = np.linalg.inv(H)
    print 'eigvals of hessian at min:', np.linalg.eigvals(H)
    # use metric defined by sqrt(xAx) where A is pos. def.
    metric = lambda x,y: np.sqrt(np.dot(x-y, np.dot(H, x-y)))
    eigvals, eigvects = dmaps.embed_data(dmaps_data, k, metric=metric)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for i in range(1, k):
        for j in range(i+1, k):
            ax.cla()
            ax.scatter(eigvects[:,i], eigvects[:,j], c=dmaps_data[:,2])
            # plt.show()
            plt.savefig('./figs/embeddings/dmaps/qssa_' + str(i) + '_' + str(j) + '.png')
コード例 #10
0
ファイル: main.py プロジェクト: glinka/sloppy_models
def disconnected_parallel_lines():
    """Data dmaps on disconnected parallel lines"""
    # make data on line
    a = 1
    npts = 200
    xdata = np.linspace(0, 2, npts)
    ydata = a * xdata
    # shift second half down and to the right
    npts_half = npts / 2
    dx = 0.5 * (1 + a * a)  # x1*(1+a*a)
    # dx = 0.0
    ydata[npts_half:] = ydata[npts_half:] - a
    xdata[npts_half:] = xdata[npts_half:] + dx - 1
    data = np.array((xdata, ydata)).T

    # # angle between line and x-axis
    # theta = np.arccos(1./np.sqrt(1 + a*a))
    # start of 'overlap' should be at x = 1/2, set dx accordingly

    print np.min(np.linalg.norm(data[:90] - data[101], axis=1))
    print 'distance between segments:', a * dx / np.sqrt(1 + a * a)

    # # plot dataset
    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # ax.scatter(data[:,0], data[:,1])
    # plt.show()

    # loop over range of epsilons
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.grid(True)
    # eps = 0.2
    k = 150
    epss = [0.1, 0.2, 0.5, 0.6, 0.8, 1, 2, 3, 5]

    colornorm = colors.Normalize(
        vmin=np.log10(np.min(epss)), vmax=np.log10(np.max(epss))
    )  # colors.Normalize(vmin=np.min(embeddings), vmax=np.max(embeddings))
    colormap = cm.ScalarMappable(norm=colornorm, cmap='viridis')

    # cs = ['r', 'b', 'g', 'c', 'y']
    eigvects_index = 1
    for i, eps in enumerate(epss):
        eigvals, eigvects = dmaps.embed_data(data, k, epsilon=eps)
        print eigvals[:4]
        # ax.scatter(data[:,0], np.sign(eigvects[0,eigvects_index])*eigvects[:,eigvects_index], c=cs[i%5])
        eps_str = '%1.1f' % eps
        ax.scatter(eigvals[1] * np.sign(eigvects[0, 1]) * eigvects[:, 1],
                   eigvals[2] * np.sign(eigvects[0, 2]) * eigvects[:, 2],
                   eigvals[3] * np.sign(eigvects[0, 3]) * eigvects[:, 3],
                   c=colormap.to_rgba(np.log10(eps)),
                   label=r'$\epsilon=$' + eps_str)
        # ax.scatter(eigvals[1]*np.sign(eigvects[0,1])*eigvects[:,1], eigvals[2]*np.sign(eigvects[0,2])*eigvects[:,2], c=colormap.to_rgba(np.log10(eps)))
    ax.set_xlabel(r'$\Phi_1$')
    ax.set_ylabel(r'$\Phi_2$')

    ax.set_zlabel(r'$\Phi_3$')
    ax.set_zticks([])

    ax.set_xticks([])
    ax.set_yticks([])

    ax.legend(fontsize=48, framealpha=0.8)

    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    # do some heatmaps
    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    # fig = plt.figure(figsize=(36, 20))
    # ax = fig.add_subplot(111)
    # eps = 0.5
    # k = 150
    # eigvals, eigvects = dmaps.embed_data(data, k, epsilon=eps)
    # embeddings = eigvects*eigvals
    # for i in range(npts):
    #     progress_bar(i, npts-1)
    #     ax.scatter(data[:,0], data[:,1], c=np.linalg.norm(embeddings - embeddings[i], axis=1), s=1000)

    #     ax.set_xlim((0,2))
    #     ax.set_ylim((0,1))
    #     ax.set_xlabel(r'$x$')
    #     ax.set_ylabel(r'$\gamma(x)$')

    #     plt.savefig('./figs/temp/pt' + str(i) + '.png')
    # print 'dun'

    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    # stop doin some heatmaps
    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    plt.show()
コード例 #11
0
def dmaps_transformed_params():
    """Perform DMAP on nonlinear, swirled transformation of parameters lambda/epsilon (important/sloppy)"""
    
    if os.path.isfile('./data/a-lam-ofevals-2016.csv'): # os.path.isfile('./data/lam_eps_ofevals-2016.csv'):
        # PERFORM THE DMAP (already generated pts):

        print 'Already have sloppy data, transforming and embedding'

        data = np.genfromtxt('./data/a-lam-ofevals-2016.csv', delimiter=',')

        # extract sloppy parameter combinations
        tol = 50 # 0.01
        data = data[data[:,-1] < tol]

        # transform into swirl in c1/c2
        S = 1.0 # for now we require S <= 1 to invert back to lambda/epsilon
        # questionable redefinition of max param values
        lam_max, epsmax = np.max(data[:,:2], axis=0)
        # c1 = lambda l, e: np.sqrt(e/epsmax + l/(S*lam_max))*np.cos(2*np.pi*S*e/epsmax)
        # c2 = lambda l, e: np.sqrt(e/epsmax + l/(S*lam_max))*np.sin(2*np.pi*S*e/epsmax)

        y1 = lambda l, e: l + np.power(np.log10(e)- np.average(np.log10(e)), 2)
        y2 = lambda l, e: np.log10(e) - np.average(np.log10(e))

        a = 1.3
        b = 0.3
        
        # do the actual transformation
        cs1 = np.array(henon(data[:,1], data[:,0], 1, a, b)).T
        cs2 = np.array(henon(data[:,1], data[:,0], 2, a, b)).T
        cs3 = np.array(henon(data[:,1], data[:,0], 3, a, b)).T
        cs4 = np.array(henon(data[:,1], data[:,0], 4, a, b)).T

        # look at dataset and subsequent transformations
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(data[:,1], data[:,0], c=data[:,2], s=3)
        ax.set_xlabel(r'$x_0 (= \lambda)$', fontsize=72)
        ax.set_ylabel(r'$y_0 (= a)$', fontsize=72)
        fig.subplots_adjust(bottom=0.15)


        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(cs1[:,0], cs1[:,1], c=data[:,2], s=3)
        ax.set_xlabel(r'$x_1$', fontsize=72)
        ax.set_ylabel(r'$y_1$', fontsize=72)
        fig.subplots_adjust(bottom=0.15)


        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(cs2[:,0], cs2[:,1], c=data[:,2], s=3)
        ax.set_xlabel(r'$x_2$', fontsize=72)
        ax.set_ylabel(r'$y_2$', fontsize=72)
        fig.subplots_adjust(bottom=0.15)


        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(cs3[:,0], cs3[:,1], c=data[:,2], s=3)
        ax.set_xlabel(r'$x_3$', fontsize=72)
        ax.set_ylabel(r'$y_3$', fontsize=72)
        fig.subplots_adjust(bottom=0.15)


        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(cs4[:,0], cs4[:,1], c=data[:,2], s=3)
        ax.set_xlabel(r'$x_4$', fontsize=72)
        ax.set_ylabel(r'$y_4$', fontsize=72)
        fig.subplots_adjust(bottom=0.15)


        plt.show()

        # neps = 8
        # eps = np.logspace(-3, 3, neps)
        # epsilon_plot(eps, cs)
        eps = 1e-1
        eigvals, eigvects = dmaps.embed_data(cs2, k=12, epsilon=eps)
        plot_dmaps.plot_xy(cs2[:,0], cs2[:,1], color=eigvects[:,1], scatter=True, xlabel=r'$y_1$', ylabel=r'$y_2$')
        # plot_dmaps.plot_embeddings(eigvects, eigvals, k=4)
    else:
        # CREATE DATASET (no dataset exists):
        # init MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        nprocs = comm.Get_size()

        # set up base system
        # specify ode parameters
        (a_true, b_true, lam_true, eps_true) = (1.0, 0.01, 1.0, 0.001) # (0.1, 0.01, 0.1, 0.001)
        params = np.array((a_true, b_true, lam_true, eps_true))
        # create system with given params
        z_system = ZM.Z_Model(params)

        # set up integration times
        t0 = 0
        tfinal = 1.0/lam_true
        dt = eps_true
        times = np.arange(t0, tfinal, dt)
        ntimes = times.shape[0]

        # get true trajectory based on true initial conditions
        x0_true = np.array((1, a_true))
        x_true_traj = z_system.get_trajectory_quadratic(x0_true, times)

        # # set up sampling grid and storage space for obj. fn. evals
        # lam_max = 1.2
        # epsmax = 1e-1
        # nsamples = 500
        # lam_samples = np.linspace(0.9*lam_true, 1.1*lam_true, nsamples)
        # eps_samples = np.linspace(0, epsmax, nsamples)
        # eps_samples = np.logspace(-6, np.log10(epsmax), nsamples)
        # data = np.empty((nsamples*nsamples, 3)) # space for obj. fn. evals
        nsamples = 40000
        data = np.empty((nsamples, 3))
        a_lam_samples = np.random.uniform(size=(nsamples, 2))*np.array((1.5,1.5)) + np.array((0.25, 0.25)) # a \in (7, 9) lamb \in (6, 11)

        count = 0
        for a, lam in uf.parallelize_iterable(a_lam_samples, rank, nprocs):
            z_system.change_parameters(np.array((a, b_true, lam, eps_true)))
            try:
                x_sample_traj = z_system.get_trajectory_quadratic(x0_true, times)
            except CustomErrors.IntegrationError:
                continue
            else:
                data[count] = (a, lam, get_of(x_sample_traj, x_true_traj))
                count = count + 1


        # count = 0
        #     for eps in eps_samples:
        #         z_system.change_parameters(np.array((a_true, b_true, lam, eps)))
        #         try:
        #             x_sample_traj = z_system.get_trajectory(x0_true, times)
        #         except CustomErrors.IntegrationError:
        #             continue
        #         else:
        #             data[count] = (lam, eps, get_of(x_sample_traj, x_true_traj))
        #             count = count + 1

        data = data[:count]
        all_data = comm.gather(data, root=0)

        if rank is 0:
            all_data = np.concatenate(all_data)
            np.savetxt('./data/a-lam-ofevals-2016.csv', all_data, delimiter=',')
            print '******************************\n \