예제 #1
0
파일: test_dmaps.py 프로젝트: glinka/dmaps
def dmaps_demo():
    """Demonstrates the DMAPS algorithm on a swissroll dataset using a predefined epsilon value"""

    data = gen_swissroll()
    epsilon = 2 #np.sqrt(5.0)
    print 'Swissroll generated with', data.shape[0], 'points'
    print 'Displaying dataset'
    plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=np.linalg.norm(data[:,:2], axis=1), s=80)
    # investigate proper epsilon
    print 'Investigating effect of epsilon on embedding'
    dmaps.epsilon_plot(data)
    start = time.clock()
    k = 4
    print 'Computing embedding'
    eigvals, eigvects = dmaps.embed_data(data, k, epsilon=epsilon)

    np.savetxt('./eigvects.csv', eigvects, delimiter=',')
    np.savetxt('./eigvals.csv', eigvals, delimiter=',')
    np.savetxt('./data.csv', data, delimiter=',')

    print 'Lanczos solver took', str(time.clock() - start) + 's', 'to find top', k, 'eigenvectors'
    print 'Displaying dmaps embeddings'
    for i in range(1, k):
        for j in range(i+1, k):
            xlabel = r'$\Phi_{' + str(i+1) + '}$'
            ylabel = r'$\Phi_{' + str(j+1) + '}$'
            plot_dmaps.plot_xy(eigvals[i]*eigvects[:,i], eigvals[j]*eigvects[:,j], xlabel=xlabel, ylabel=ylabel, title='Embedding dataset with ' + xlabel + ' and ' + ylabel, color=eigvects[:,2], s=50, scatter=True, hide_ticks=True, cmap='jet')
예제 #2
0
def dmaps_contour():
    data = np.genfromtxt('./data/output/contour_KVSt_to_dmaps_transformed.csv', skip_header=0, delimiter=',')
    # npts_to_dmaps = 5000
    # slice_size = data.shape[0]/npts_to_dmaps
    # data = data[::slice_size]
    npts = data.shape[0]
    # noise_level = 0.01
    # data[:,2] = data[:,2] + noise_level*np.random.normal(size=(npts))
    # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], xlabel='St', ylabel='V', zlabel='K')
    # plot_dmaps.epsilon_plot(np.logspace(-4, 2, 10), data)
    ndims = 5
    eigvals, eigvects = dmaps.embed_data(data, ndims, epsilon=1e-1)
    # plot_dmaps.plot_embeddings(eigvects, eigvals)
    # # for use with eps=5e-2
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,1], xlabel='St', ylabel='V', zlabel='K')
    # # for use with eps=1e-1
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,1], eigvects[:,2], color=data[:,0], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,1]/data[:,2], scatter=True)
    # plot_dmaps.plot_xy(eigvects[:,2], eigvects[:,13], color=data[:,0], scatter=True)
    for i in range(1, ndims):
        plot_dmaps.plot_xyz(data[:,0], data[:,1], data[:,2], color=eigvects[:,i], xlabel=r'$\hat{St}$', ylabel=r'$\hat{V}$', zlabel=r'$\hat{K}$')
예제 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_files',
                        nargs='+',
                        help="files from which data will be read")
    parser.add_argument('--dmap-embeddings',
                        action='store_true',
                        default=False,
                        help="plot 2d DMAP embeddings from eigenvector inputs")
    parser.add_argument(
        '--kernel-sums',
        action='store_true',
        default=False,
        help=
        "plots kernel sums vs. epsilon in kernel for determination of epsilon in DMAP"
    )
    parser.add_argument(
        '--of-coloring',
        action='store_true',
        default=False,
        help="plots the k1, k2 plane colored by obj. fn. value")
    parser.add_argument(
        '--kplane-coloring',
        action='store_true',
        default=False,
        help="plots k1, k2 plane colored by successive DMAP eigenvectors")
    parser.add_argument(
        '--param-surface',
        action='store_true',
        default=False,
        help=
        "plots parameters from 'sloppy_params.csv' in three-dimensional space")
    parser.add_argument(
        '--kvs-of-contours',
        action='store_true',
        default=False,
        help=
        "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (K, V, St, of_eval)"
    )
    parser.add_argument(
        '--ek-of-contours',
        action='store_true',
        default=False,
        help=
        "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (epsilon, kappa, of_eval)"
    )
    parser.add_argument(
        '--kv-of-contours',
        action='store_true',
        default=False,
        help=
        "plots contours of the objective function, where the input data is a (npts, 4) array whose rows are (K, V, of_eval)"
    )
    parser.add_argument(
        '--of-contour',
        action='store_true',
        default=False,
        help=
        "plots contour of the objective function, where the input data is a (npts, 2) array whose rows are (param1, param2)"
    )
    args = parser.parse_args()
    # import data from files
    # organize each dataset by file header, then by type of data as a dictionary of dictionaries. Each entry of 'dataset' should correspond to dictionary with keys given by 'data_types' and values of actual data. dataset -> data type -> raw data
    # the overarching 'datasets' dict is not meant to be indexed by its keys which are convoluted tuples created from the header, but rather it is intended to be used as an interable in a "for d in datasets" fashion
    datasets = {}
    data_types = [
        'eigvals', 'eigvects', 'sloppy_params', 'epsilons', 'kernel_sums',
        'contour', 'guesses'
    ]
    for filename in args.input_files:
        # only import csv files
        if filename[-4:] == ".csv":
            data, params = uf.get_data(filename, header_rows=1)
            dataset_key = tuple([(key, params[key]) for key in params.keys()])
            if dataset_key not in datasets.keys():
                # no entry currently exists, assign dictionary with entries of empty lists. also assign 'params' entry for dataset dict
                datasets[dataset_key] = {}
                datasets[dataset_key]['params'] = params
            # add data to appropriate dataset, under appropriate 'data_set' key
            for data_type in data_types:
                if data_type in filename:
                    datasets[dataset_key][data_type] = data

    # run desired routines over each dataset
    for dataset in datasets.values():
        # plots the k1, k2 plane colored by obj. fn. value
        if args.of_coloring:
            plot_dmaps.plot_xy(dataset['sloppy_params'][:, 0],
                               dataset['sloppy_params'][:, 1],
                               color=dataset['sloppy_params'][:, 2],
                               xlabel=r"$k_1$",
                               ylabel="$k_2$",
                               scatter=True)
        # plots k1, k2 plane colored by successive DMAP eigenvectors
        if args.kplane_coloring:
            # # note the necessity of transposing the eigvects as they are read as row vectors from the file, while the plotting fn. expects column vectors
            # plot_dmaps.plot_embeddings(dataset['eigvects'].T, dataset['eigvals'], dataset['params'])
            # now that we're using Eigen's output, no need to transpose eigvects
            for i in range(1, dataset['eigvects'].shape[1]):
                plot_dmaps.plot_xy(dataset['sloppy_params'][:, 0],
                                   dataset['sloppy_params'][:, 1],
                                   color=-dataset['eigvects'][:, i],
                                   xlabel=r"$k_1$",
                                   ylabel="$k_2$",
                                   scatter=True)
        # plots kernel sums vs. epsilon in kernel for determination of epsilon in DMAP
        if args.kernel_sums:
            plot_dmaps.plot_xy(dataset['epsilons'],
                               dataset['kernel_sums'],
                               xlabel=r"$\epsilon$",
                               ylabel="$\sum W_{ij}$",
                               xscale='log',
                               yscale='log')
        if args.dmap_embeddings:
            # color by ob. fn. value if dealing with the k1, k2 sloppy param dataset
            if 'sloppy_params' in dataset.keys():
                # plot_dmaps.plot_embeddings(dataset['eigvects'], np.linspace(1,10,dataset['eigvects'].shape[1]), color=dataset['sloppy_params'][:,2])

                # # # custom 3d plot, should eventually delete
                # fig = plt.figure()
                # ax = fig.add_subplot(111, projection='3d')
                # p = ax.scatter(dataset['eigvects'][:,1], dataset['eigvects'][:,2], dataset['eigvects'][:,13], c=dataset['sloppy_params'][:,2])
                # ax.set_xlabel(r'$\phi_3$')
                # ax.set_ylabel(r'$\phi_6$')
                # ax.set_zlabel(r'$\phi_{12}$')
                # plt.tick_params(axis='both', which='major', labelsize=0)
                # fig.colorbar(p)
                # plt.show(fig)
                # # end custom plot

                plot_dmaps.plot_embeddings(dataset['eigvects'].T,
                                           dataset['eigvals'],
                                           color=dataset['sloppy_params'][:,
                                                                          1],
                                           colorbar=True,
                                           k=6)  #plot_3d=True)
            else:
                plot_dmaps.plot_embeddings(dataset['eigvects'].T,
                                           dataset['eigvals'],
                                           plot_3d=False)
        if args.param_surface:
            # assume only three parameters have been used for investigation and plot the values in log-space
            plot_dmaps.plot_xyz(dataset['sloppy_params'][:, 0],
                                dataset['sloppy_params'][:, 1],
                                dataset['sloppy_params'][:, 2],
                                xlabel=r'$K_M$',
                                ylabel=r'$V_M$',
                                zlabel=r'$\epsilon$',
                                color=dataset['sloppy_params'][:, 2],
                                labelsize=24)
        if args.kvs_of_contours:
            print 'loaded data, plotting'
            plot_of_k_v_st_contours(dataset['contour'])
        if args.ek_of_contours:
            plot_of_contours(dataset['contour'], r'$\epsilon$', r'$\kappa$')
        if args.kv_of_contours:
            plot_of_contours(dataset['contour'], r'$K$', r'$V$')
        if args.of_contour:
            plot_contour(dataset['contour'])  #, guesses=dataset['guesses'])
예제 #4
0
def dmaps_param_set_grad_kernel():
    """DMAP a collection of sloppy parameter combinations using a kernel which accounts for objective function value and should, ideally, uncover the important parameter(s) in the model"""

    # set up base model
    A0 = 1.0  # initial concentration of A
    k1_true = 1.0
    kinv_true = 1000.0
    k2_true = 1000.0
    decay_rate = k1_true * k2_true / (
        kinv_true + k2_true
    )  # effective rate constant that governs exponential growth rate
    # start at t0 = 0, end at tf*decay_rate = 4
    ntimes = 20  # arbitrary
    times = np.linspace(0, 4 / decay_rate, ntimes)
    model = Rawlings_Model(times,
                           A0,
                           k1_true,
                           kinv_true,
                           k2_true,
                           using_sympy=True)

    # import existing data
    data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',')

    of_tol = 0.4  # from plotting with scratch.py
    somedata = data[data[:, 0] < of_tol]
    # only keep npts points due to computational considerations
    npts = 6000
    slice_size = somedata.shape[0] / npts
    somedata = somedata[::slice_size]
    # keff = somedata[:,1]*somedata[:,3]/(somedata[:,2] + somedata[:,3])

    log_params_data = np.log10(somedata[:, 1:])
    # add some noise
    noise_level = 0.02
    log_params_data = log_params_data + noise_level * np.random.normal(
        size=log_params_data.shape)
    # log_params_data = np.log10(data[:,1:])

    # evaluate various epsilons for DMAP kernel
    neps = 5  # number of epsilons to evaluate
    epsilons = np.logspace(-3, 2, neps)
    kernels = [
        DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient)
        for epsilon in epsilons
    ]
    # # investigate proper choice of epsilon
    # plot_dmaps.kernel_plot(kernels, epsilons, somedata[:,1:]) # use un-logged data if using gradient of ob. fn. in kernel
    # DMAP with o.f. kernel, appears the epsilon = 20 is appropriate
    epsilon = 20.0
    kernel = DMAPS_Gradient_Kernel(epsilon, model.sympy_lsq_of_gradient)
    k = 15
    eigvals, eigvects = dmaps.embed_data_customkernel(somedata[:, 1:], k,
                                                      kernel)
    plot_dmaps.plot_xyz(somedata[:, 1],
                        somedata[:, 2],
                        somedata[:, 3],
                        color=eigvects[:, 1])
    plot_dmaps.plot_xyz(somedata[:, 1],
                        somedata[:, 2],
                        somedata[:, 3],
                        color=eigvects[:, 2])
예제 #5
0
def dmaps_param_set():
    """Performs DMAP of log(parameter) set that fall within some ob. fn. tolerance"""
    # import data and save only those parameter combinations such that error(k1, kinv, k2) < tol

    data = np.load('./temp.pkl')
    # data = np.genfromtxt('./data/params-ofevals.csv', delimiter=',')

    of_max = 1e-3
    k1_max = 10
    kinv_min = 100
    k2_min = 100
    # data = data[data[:,0] < of_max]
    # data = data[data[:,1] < k1_max]
    # data = data[data[:,2] > kinv_min]
    # data = data[data[:,3] > k2_min]
    # slice = 5000
    # data = data[::data.shape[0]/slice]
    # data.dump('./temp.pkl')

    # of_max = 0.002 # from plotting with scratch.py
    # data = data[data[:,0] < of_max]
    # slice_size = 4 # used to further trim data
    # data = data[::slice_size]
    print 'have', data.shape[0], 'pts in dataset'
    keff = data[:, 1] * data[:, 3] / (data[:, 2] + data[:, 3])
    log_params_data = np.log10(data[:, 1:])
    # # investigate which epsilon to choose
    # neps = 10 # number of epsilons to investigate
    # epsilons = np.logspace(-3,2, neps)
    # plot_dmaps.epsilon_plot(epsilons, log_params_data)
    # dmap the log data
    epsilon = 0.3  # from epsilon_plot
    k = 12  # number of dimensions for embedding

    # search through files in ./data to see if the embedding has already been computed
    filename_id = 'tol-' + str(of_max) + '-k-' + str(k)
    found_previous_embeddings = False

    eigvals, eigvects = None, None

    for filename in os.listdir('./data'):
        if filename_id in filename:
            # found previously saved data, import and do not recompute
            eigvects = np.genfromtxt('./data/dmaps-eigvects--tol-' +
                                     str(of_max) + '-k-' + str(k) + '.csv',
                                     delimiter=',')
            eigvals = np.genfromtxt('./data/dmaps-eigvals--tol-' +
                                    str(of_max) + '-k-' + str(k) + '.csv',
                                    delimiter=',')
            found_previous_embeddings = True
            break

    if found_previous_embeddings is False:
        print 'plotting from previous points'
        eigvals, eigvects = dmaps.embed_data(log_params_data,
                                             k,
                                             epsilon=epsilon)
        np.savetxt('./data/dmaps-eigvects--tol-' + str(of_max) + '-k-' +
                   str(k) + '.csv',
                   eigvects,
                   delimiter=',')
        np.savetxt('./data/dmaps-eigvals--tol-' + str(of_max) + '-k-' +
                   str(k) + '.csv',
                   eigvals,
                   delimiter=',')

    plot_dmaps.plot_xyz(log_params_data[:, 0],
                        log_params_data[:, 1],
                        log_params_data[:, 2],
                        color=eigvects[:, 1],
                        xlabel='\n\n' + r'$\log(k_1)$',
                        ylabel='\n\n' + r'$\log(k_{-1})$',
                        zlabel='\n\n' + r'$\log(k_2)$')
    plot_dmaps.plot_xyz(log_params_data[:, 0],
                        log_params_data[:, 1],
                        log_params_data[:, 2],
                        color=eigvects[:, 2],
                        xlabel='\n\n' + r'$\log(k_1)$',
                        ylabel='\n\n' + r'$\log(k_{-1})$',
                        zlabel='\n\n' + r'$\log(k_2)$')
예제 #6
0
# ax.xaxis.set_scale('log')
# ax.yaxis.set_scale('log')
# ax.zaxis.set_scale('log')

# dmaps stuff
import plot_dmaps

eigvects = np.genfromtxt('./data/dmaps-eigvects--tol-' + str(of_tol) + '-k-' +
                         str(k) + '.csv',
                         delimiter=',')
eigvals = np.genfromtxt('./data/dmaps-eigvals--tol-' + str(of_tol) + '-k-' +
                        str(k) + '.csv',
                        delimiter=',')
plt.scatter(eigvects[:, 1], np.ones(eigvects.shape[0]), c=np.log10(keff), lw=0)
plt.show()

plot_dmaps.plot_xyz(log_params_data[:, 0],
                    log_params_data[:, 1],
                    log_params_data[:, 2],
                    color=eigvects[:, 1],
                    xlabel='log(k1)',
                    ylabel='log(kinv)',
                    zlabel='log(k2)')
plot_dmaps.plot_xyz(log_params_data[:, 0],
                    log_params_data[:, 1],
                    log_params_data[:, 2],
                    color=eigvects[:, 2],
                    xlabel='log(k1)',
                    ylabel='log(kinv)',
                    zlabel='log(k2)')