Example #1
0
def my_kernelPCA(kernel):
    pca_traj.superpose(
        pca_traj, 0, atom_indices=sele_grp
    )  # Superpose each conformation in the trajectory upon first frame
    sele_trj = pca_traj.xyz[:,
                            sele_grp, :]  # select coordinates of selected atom groups
    sele_traj_reshaped = sele_trj.reshape(pca_traj.n_frames, len(sele_grp) * 3)
    sele_traj_reshaped = sele_traj_reshaped.astype(
        float)  ## to avoid numpy Conversion Error during scaling
    sele_traj_reshaped_scaled = preprocessing.scale(
        sele_traj_reshaped, axis=0, with_std=False)  # center to the mean

    kpca = KernelPCA(kernel=kernel, fit_inverse_transform=True, gamma=10)
    kpca.fit(sele_traj_reshaped_scaled)
    #print "Trace of the covariance matrix is: ", np.trace(kpca.get_covariance())
    kpca_reduced = kpca.transform(sele_traj_reshaped_scaled)

    #write plots
    write_plots('kpca_projection', kpca_reduced, out_dir)
    title = 'kPCA Projection'
    write_fig('kpca_projection', kpca_reduced, out_dir, title)

    #write variance
    kpca_variance_fname = out_dir + '/kpca_variance'
    np.savetxt(kpca_variance_fname, kpca.lambdas_)

    pc1_cos = get_cosine(kpca_reduced, 0)
    print 'cosine content of first PC=', pc1_cos
    pc2_cos = get_cosine(kpca_reduced, 1)
    print 'cosine content of second PC=', pc2_cos
    pc3_cos = get_cosine(kpca_reduced, 2)
    print 'cosine content of 3rd PC=', pc3_cos
    pc4_cos = get_cosine(kpca_reduced, 3)
    print 'cosine content of 4th PC=', pc4_cos
    return
Example #2
0
def mds(input, type):
    'metric and nonmetric Multidimensional scaling'
    seed = np.random.RandomState(seed=1)
    #np.savetxt('mds_input.txt', input) ## testing value error
    if type == 'nm':
        nmds = MDS(n_components=100,
                   max_iter=3000,
                   metric=False,
                   random_state=seed,
                   dissimilarity="precomputed")
        print("Performing non-metric MDS..")
        npos = nmds.fit_transform(input)

        # write PC plots
        write_plots('nmds_projection', npos, out_dir)
        title = 'nMDS Projection'
        write_fig('nmds_projection', npos, out_dir, title)
        # cosine content

        pc1_cos = get_cosine(npos, 0)
        print('cosine content of first PC=', pc1_cos)
        pc2_cos = get_cosine(npos, 1)
        print('cosine content of second PC=', pc2_cos)
        pc3_cos = get_cosine(npos, 2)
        print('cosine content of 3rd PC=', pc3_cos)
        pc4_cos = get_cosine(npos, 3)
        print('cosine content of 4th PC=', pc4_cos)

    elif type == 'metric':
        mmds = MDS(n_components=100,
                   max_iter=3000,
                   random_state=seed,
                   dissimilarity="precomputed")
        print("Performing metric MDS..")
        mpos = mmds.fit_transform(input)

        # write PC plots
        write_plots('mmds_projection', mpos, out_dir)
        title = 'mMDS Projection'
        write_fig('mmds_projection', mpos, out_dir, title)
        # cosine content

        pc1_cos = get_cosine(mpos, 0)
        print('cosine content of first PC=', pc1_cos)
        pc2_cos = get_cosine(mpos, 1)
        print('cosine content of second PC=', pc2_cos)
        pc3_cos = get_cosine(mpos, 2)
        print('cosine content of 3rd PC=', pc3_cos)
        pc4_cos = get_cosine(mpos, 3)
        print('cosine content of 4th PC=', pc4_cos)

    else:
        print('ERROR: Please check -mt flag options by running mds.py -h')

    return
Example #3
0
def tsne(input):
	't-distributed Stochastic Neighbor Embedding'
	seed = np.random.RandomState(seed=1)
	my_tsne = TSNE(n_components=3, n_iter=3000, random_state=seed, init='pca') ## apparantly n_components more than 3 throws error in certain cases. 
	print "Performing TSNE..."
	mpos = my_tsne.fit_transform(input)
	write_plots('tsne_projection', mpos, out_dir)
	title='t-SNE Projection'
	write_fig('tsne_projection', mpos, out_dir, title)

	return;
Example #4
0
def tsne(input):
    't-distributed Stochastic Neighbor Embedding'
    seed = np.random.RandomState(seed=1)
    my_tsne = TSNE(
        n_components=3,
        perplexity=args.perplexity,
        n_iter=args.n_iter,
        learning_rate=args.learning_rate,
        random_state=seed,
        init='pca'
    )  ## apparantly n_components more than 3 throws error in certain cases.
    print("Performing TSNE...with perplexity", args.perplexity, "n_iter",
          args.n_iter, " and learning_rate", args.learning_rate)
    mpos = my_tsne.fit_transform(input)
    write_plots('tsne_projection', mpos, out_dir)
    title = 't-SNE Projection'
    write_fig('tsne_projection', mpos, out_dir, title)

    return
def distance_pca(int_cord1):
	'Internal Coordinate Based PCA'
	
	pca = PCA(n_components=comp)
	dpca = pca.fit(int_cord1)
	dpca_reduced=dpca.transform(int_cord1)
	
	write_plots('dpca_projection', dpca_reduced, out_dir)
	write_pcs('dpca_pcs', dpca, out_dir)
	title='internal coordinate PCA Projection'
	write_fig('dpca_projection', dpca_reduced, out_dir, title)
	
	pc1_cos=get_cosine(dpca_reduced, 0)
	print 'cosine content of first PC=',pc1_cos
	pc2_cos=get_cosine(dpca_reduced, 1)
	print 'cosine content of second PC=', pc2_cos
	pc3_cos=get_cosine(dpca_reduced, 2)
	print 'cosine content of 3rd PC=',pc3_cos
	pc4_cos=get_cosine(dpca_reduced, 3)
	print 'cosine content of 4th PC=', pc4_cos
	return;
Example #6
0
def svd_pca(svd):
    'single value decomposition based PCA'
    pca_traj.superpose(
        pca_traj, 0, atom_indices=sele_grp
    )  # Superpose each conformation in the trajectory upon first frame
    sele_trj = pca_traj.xyz[:,
                            sele_grp, :]  # select coordinates of selected atom groups
    sele_traj_reshaped = sele_trj.reshape(pca_traj.n_frames, len(sele_grp) * 3)
    sele_traj_reshaped = sele_traj_reshaped.astype(
        float)  ## to avoid numpy Conversion Error during scaling
    sele_traj_reshaped_scaled = preprocessing.scale(
        sele_traj_reshaped, axis=0, with_std=False)  # center to the mean

    pca_sele_traj = PCA(n_components=comp)
    pca_sele_traj.fit(sele_traj_reshaped_scaled)
    pca_sele_traj_reduced = pca_sele_traj.transform(sele_traj_reshaped_scaled)

    print "Trace of the covariance matrix is: ", np.trace(
        pca_sele_traj.get_covariance())

    # write the plots

    write_plots('pca_projection', pca_sele_traj_reduced, out_dir)
    title = 'PCA Projection'
    write_fig('pca_projection', pca_sele_traj_reduced, out_dir, title)

    #write the pcs variance
    write_pcs('pca_variance', pca_sele_traj, out_dir)

    pc1_cos = get_cosine(pca_sele_traj_reduced, 0)
    print 'cosine content of first PC=', pc1_cos
    pc2_cos = get_cosine(pca_sele_traj_reduced, 1)
    print 'cosine content of second PC=', pc2_cos
    pc3_cos = get_cosine(pca_sele_traj_reduced, 2)
    print 'cosine content of 3rd PC=', pc3_cos
    pc4_cos = get_cosine(pca_sele_traj_reduced, 3)
    print 'cosine content of 4th PC=', pc4_cos

    return
Example #7
0
def incremental_pca():
    ' normal PCA is very memory intesive. It can be problemetic for large dataset, \
	since dataset is stored in memory. Incremental principal component analysis (IPCA) is \
	typically used for such cases. '

    pca_traj.superpose(
        pca_traj, 0, atom_indices=sele_grp
    )  # Superpose each conformation in the trajectory upon first frame
    sele_trj = pca_traj.xyz[:,
                            sele_grp, :]  # select coordinates of selected atom groups
    sele_traj_reshaped = sele_trj.reshape(pca_traj.n_frames, len(sele_grp) * 3)
    sele_traj_reshaped = sele_traj_reshaped.astype(
        float)  ## to avoid numpy Conversion Error during scaling
    sele_traj_reshaped_scaled = preprocessing.scale(
        sele_traj_reshaped, axis=0, with_std=False)  # center to the mean

    ipca = IncrementalPCA()
    ipca = ipca.fit(sele_traj_reshaped_scaled)
    ipca_reduced = ipca.transform(sele_traj_reshaped_scaled)

    #write plots
    write_plots('ipca_projection', ipca_reduced, out_dir)
    title = 'iPCA Projection'
    write_fig('ipca_projection', ipca_reduced, out_dir, title)
    #write variance
    #np.savetxt('ipca_variance', kpca.lambdas_)
    pc1_cos = get_cosine(ipca_reduced, 0)
    print 'cosine content of first PC=', pc1_cos
    pc2_cos = get_cosine(ipca_reduced, 1)
    print 'cosine content of second PC=', pc2_cos
    pc3_cos = get_cosine(ipca_reduced, 2)
    print 'cosine content of 3rd PC=', pc3_cos
    pc4_cos = get_cosine(ipca_reduced, 3)
    print 'cosine content of 4th PC=', pc4_cos

    return
Example #8
0
def my_pca():
    'eigenvales decomposition PCA'
    pca_traj.superpose(
        pca_traj, 0, atom_indices=sele_grp
    )  # Superpose each conformation in the trajectory upon first frame
    sele_trj = pca_traj.xyz[:,
                            sele_grp, :]  # select coordinates of selected atom groups
    sele_traj_reshaped = sele_trj.reshape(pca_traj.n_frames, len(sele_grp) * 3)
    #arr1=sele_traj_reshaped
    sele_traj_reshaped = sele_traj_reshaped.astype(
        float)  ## to avoid numpy Conversion Error during scaling
    sele_traj_reshaped_scaled = preprocessing.scale(
        sele_traj_reshaped, axis=0, with_std=False)  # center to the mean
    arr = sele_traj_reshaped_scaled

    #===============================================
    # covariance matrix
    cov_mat = np.corrcoef(arr, rowvar=False)
    trj_eval, trj_evec = np.linalg.eig(cov_mat)

    print "Trace of cov matrix is ", np.trace(cov_mat)

    #=============================
    # sanity check of calculated eigenvector and eigen values
    # it must be cov matrix * eigen vector = eigen vector * eigen value

    for i in range(len(trj_eval)):
        eigv = trj_evec.real[:, i].reshape(
            1,
            len(trj_evec[:, 0]),
        ).T
        np.testing.assert_array_almost_equal(cov_mat.dot(eigv),
                                             trj_eval[i] * eigv,
                                             decimal=3,
                                             err_msg='',
                                             verbose=True)


#=============================================
# sort the eigenvalues and eigenvector
    sort_idx = trj_eval.argsort()[::-1]
    trj_eval = trj_eval[sort_idx]
    trj_evec = trj_evec[sort_idx]

    tot_var = np.sum(trj_eval.real)
    variation = []
    cum = []
    j = 0
    eigv = []
    n_comp = 100
    pca = trj_evec.real[:, 0:n_comp]  ## keep first 100 eigenvectors
    for i in trj_eval.real[0:n_comp]:
        eigv.append(i)
        variation.append((i / tot_var) * 100)
        j += 1

    # write PC plot
    pca_variance_fname = out_dir + '/pca_variance.agr'
    np.savetxt(pca_variance_fname, variation)
    ef = open(pca_variance_fname, 'r')
    ef_cont = ef.read()
    ef.close()
    title = '\tcreated by pca.py\t'
    my_time = strftime("%Y-%m-%d  %a  %H:%M:%S", gmtime())
    legends = '@    title "explained_variance of PCs"\n\
	@    xaxis  label "PCs"\n\
	@    yaxis  label "% Variance"\n\
	@	TYPE xy\n\
	@    s0 symbol 1\n\
	@    s0 symbol size 0.250000\n\
	@    s0 symbol color 1\n\
	@    s0 symbol pattern 1\n\
	@    s0 symbol fill color 1\n\
	@    s0 symbol fill pattern 1\n\
	@    s0 symbol linewidth 1.0\n\
	@    s0 symbol linestyle 1\n\
	@    s0 symbol char 25\n\
	@	s0 symbol fill color 2\n\
	@	s0 symbol color 2\n\
	@    s0 symbol char font 0\n\
	@    s0 symbol skip 0\n'

    ef = open(pca_variance_fname, 'w')
    ef.write('#' + title + '\ton\t' + my_time + '\n' + legends + '\n' +
             ef_cont + '\n')
    ef.close()
    #========================================================
    # transform the input data into choosen pc
    arr_transformed = arr.dot(pca)
    #arr_transformed = pca.T.dot(arr.T)
    print arr_transformed.shape
    write_plots('pca_projection', arr_transformed, out_dir)
    title = 'PCA Projection'
    write_fig('pca_projection', arr_transformed, out_dir, title)

    ## RMSF
    get_rmsf(pca_traj, sele_grp, trj_eval, out_dir)

    pc1_cos = get_cosine(arr_transformed, 0)
    print 'cosine content of first PC=', pc1_cos
    pc2_cos = get_cosine(arr_transformed, 1)
    print 'cosine content of second PC=', pc2_cos
    pc3_cos = get_cosine(arr_transformed, 2)
    print 'cosine content of 3rd PC=', pc3_cos
    pc4_cos = get_cosine(arr_transformed, 3)
    print 'cosine content of 4th PC=', pc4_cos

    return