Beispiel #1
0
def get_alanine_data(input_type='coordinates', return_dihedrals=True):

    import mdshare

    retval = []

    if input_type == 'distances':

        local_filename = mdshare.fetch(
            'alanine-dipeptide-3x250ns-heavy-atom-distances.npz')

        traj_whole = np.load(local_filename)['arr_0']

    elif input_type == 'coordinates':

        local_filename = mdshare.fetch(
            'alanine-dipeptide-3x250ns-heavy-atom-positions.npz')

        traj_whole = np.load(local_filename)['arr_0']

    retval.append(traj_whole)

    if return_dihedrals:
        dihedral = np.load(
            mdshare.fetch(
                'alanine-dipeptide-3x250ns-backbone-dihedrals.npz'))['arr_0']
        retval.append(dihedral)

    return retval
Beispiel #2
0
def loadData(
):  #function which downloads data from FTP server or loads it from file if already downloaded and then returns it as ndarray
    dataset = mdshare.fetch(
        'alanine-dipeptide-3x250ns-heavy-atom-distances.npz')
    with np.load(dataset) as f:
        X = np.vstack([f[key] for key in sorted(f.keys())])
    return X
Beispiel #3
0
def test(catalogue_file, checksum_file):
    repository = Repository(catalogue_file, checksum_file)
    working_directory = 'mdshare-testing-area'
    os.mkdir(working_directory)
    for file in repository.index:
        local_file = fetch(
            file,
            working_directory=working_directory,
            repository=repository)
        os.remove(local_file)
    for file in repository.containers:
        local_files = fetch(
            file,
            working_directory=working_directory,
            repository=repository)
        try:
            os.remove(local_files)
        except TypeError:
            for local_file in local_files:
                os.remove(local_file)
    os.rmdir(working_directory)
Beispiel #4
0
def funkcja(n):
    dataset = mdshare.fetch(
        'alanine-dipeptide-3x250ns-heavy-atom-distances.npz')
    with np.load(dataset) as f:
        X = np.vstack([f[key] for key in sorted(f.keys())])

    X_new = X[:1000]
    color = X_new[:, 1]
    Y = Funkcja.fun(X_new, n)

    #print(X_new)

    #x=X_new.flatten()
    #y=Y.flatten()
    #print("aaaaa")
    plt.scatter(Y[:, 0], Y[:, 1], c=color)
    def test_mlmsm_pipeline(self):
        file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')

        with np.load(file) as fh:
            data = fh['trajectory']
            transition_matrix = fh['transition_matrix']

        pipeline = Pipeline(steps=[
            ('tica', tica.TICA(lagtime=1, dim=1)),
            ('cluster', kmeans.KmeansClustering(n_clusters=2, max_iter=500)),
            ('counts', TransitionCountEstimator(lagtime=1, count_mode="sliding"))
        ])
        pipeline.fit(data)
        counts = pipeline[-1].fetch_model().submodel_largest()
        mlmsm = msm.MaximumLikelihoodMSM().fit(counts).fetch_model()
        P = mlmsm.pcca(2).coarse_grained_transition_matrix
        mindist = min(np.linalg.norm(P - transition_matrix), np.linalg.norm(P - transition_matrix.T))
        assert mindist < 0.05
Beispiel #6
0
def funkcja(samp, dim, file_in, file_out):
    dataset = mdshare.fetch(file_in)
    with np.load(dataset) as f:
        X = np.vstack([f[key] for key in sorted(f.keys())])

    samples = np.arange(0, X.shape[0], samp)

    X_sample = X[samples, :]
    mds = manifold.TSNE(n_components=dim, init='pca')
    Y = mds.fit_transform(X_sample)
    k = kde.gaussian_kde(Y.T)
    xi, yi = np.mgrid[Y[:, 0].min():Y[:, 0].max():100 * 1j,
                      Y[:, 1].min():Y[:, 1].max():100 * 1j]
    zi = k(np.vstack([xi.flatten(), yi.flatten()]))
    plt.pcolormesh(xi,
                   yi,
                   zi.reshape(xi.shape),
                   shading='gouraud',
                   cmap=plt.cm.Blues)
    plt.contour(xi, yi, zi.reshape(xi.shape))
    plt.savefig(file_out, transparent=True, bbox_inches='tight')
Beispiel #7
0
def Main():
    global V_sampling, V_learning_rate, V_n_iter, V_min_grad_norm, V_visible_points, V_pca, args

    # Setting variables from console
    parser = argparse.ArgumentParser(
        prog='project',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
             Marta Preis (285129)
             ----------------------------------------
                 Converting high-dimensional tensor
                 and returns its projection onto
                 a low-dimensional space.
             ----------------------------------------
             '''))
    parser.add_argument(
        '-d',
        '--data',
        metavar='',
        default='alanine-dipeptide-3x250ns-heavy-atom-distances.npz',
        help='Path do data or name (download using mdshare)')
    parser.add_argument('-s',
                        '--sampling',
                        metavar='',
                        type=int,
                        default=500,
                        help='Using only every x sample. (default: 500)')
    parser.add_argument(
        '-l',
        '--learning_rate',
        metavar='',
        type=int,
        default=200,
        help=
        'The learning rate is usually in the range [10.0, 1000.0]. (default: 200)'
    )
    parser.add_argument(
        '-i',
        '--n_iter',
        metavar='',
        type=int,
        default=1000,
        help=
        'Maximum number of iterations for the optimization. Should be at least 250. (default: 1000)'
    )
    parser.add_argument(
        '-n',
        '--min_grad_norm',
        metavar='',
        type=float,
        default=1e-5,
        help=
        'If the gradient norm is below this threshold, the optimization will be stopped. (default: 1e-5)'
    )
    parser.add_argument('-v',
                        '--visible_points',
                        action='store_true',
                        help='Show all points in plot')
    parser.add_argument('-p',
                        '--pca',
                        action='store_true',
                        help='Initialization of embedding pca')
    # Show help
    parser.print_help()
    # Set variables
    args = parser.parse_args()
    V_data = args.data
    V_sampling = args.sampling
    V_learning_rate = args.learning_rate
    V_n_iter = args.n_iter
    V_min_grad_norm = args.min_grad_norm
    V_visible_points = args.visible_points
    V_pca = args.pca

    # Check if file is available, if not, download data
    if os.path.isfile(V_data):
        dataset = yaml.safe_load(V_data)
    else:
        dataset = mdshare.fetch(V_data)
    with np.load(dataset) as f:
        Y = np.vstack([f[key] for key in sorted(f.keys())])


# Fitting function
    fit(Y)
Beispiel #8
0
    '-sol',
    '--solver',
    type=str,
    default='auto',
    help=
    "Metoda rozwiazania oparta na SVD: 'auto' - najbardziej wydajna metoda losowa, inne: 'full', 'arpack','randomized'; domyslnie: auto "
)

#przypisanie odpowiednich argumentow
args = parser.parse_args()
Dimensions = args.dimensions
Step = args.step
Solver = args.solver

#pobieranie danych z biblioteki mdshare
dataset = mdshare.fetch('alanine-dipeptide-3x250ns-heavy-atom-distances.npz')
with np.load(dataset) as f:
    X = np.vstack([f[key] for key in sorted(f.keys())])

#Standaryzacja danych wejsciowych w celu ujednolicenia danych (można pominąc)
X_std = StandardScaler().fit_transform(X)

if Dimensions == 3:

    #wlasciwa redukcja wymiarow
    Y = PCA(n_components=Dimensions,
            svd_solver=Solver).fit_transform(X_std[::Step])

    #dopasowanie do zakresu od -pi do +pi
    Y[:, 0] = np.interp(Y[:, 0], (Y[:, 0].min(), Y[:, 0].max()),
                        (-np.pi, np.pi))
Beispiel #9
0
#!/usr/bin/env python
'''
@Author ymh
@Email  [email protected]
@Date   2021-08-29 15:03:30
@Web    https://github.com/Aunity
'''

import os, sys
from mdshare import fetch
fetch('pentapeptide-impl-solv.pdb')
fetch('pentapeptide-*-500ns-impl-solv.xtc')
        X, bioclustering.molecule_distances.crmsd_kabsch)
    print('cutoff:', np.mean(D))
    [I, M] = bioclustering.clustering.gromos(D, np.mean(D))
    print(I, M)

    # GROMOS using d-RMSD
    # create distance matrix using d-RMSD
    D = bioclustering.molecule_distances.create_distance_mat(
        X, bioclustering.molecule_distances.drmsd)
    print('cutoff:', np.mean(D))
    [I, M] = bioclustering.clustering.gromos(D, np.mean(D))
    print(I, M)

    # Clustering (e.g. k-means) using atom distances vectors as features
    # create atom distances vectors
    X_atom_dists = []
    for mol in X:
        X_atom_dists.append(
            bioclustering.featurization.atom_distances_vector(mol))

    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_atom_dists)
    print(kmeans.labels_)
    print(kmeans.cluster_centers_)

    # Use PyEmma to download trajectories & extract features using featurizer
    files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc',
                          working_directory='data')
    pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb',
                        working_directory='data')
    X_load = pyemma.coordinates.load(files, top=pdb)