def get_alanine_data(input_type='coordinates', return_dihedrals=True): import mdshare retval = [] if input_type == 'distances': local_filename = mdshare.fetch( 'alanine-dipeptide-3x250ns-heavy-atom-distances.npz') traj_whole = np.load(local_filename)['arr_0'] elif input_type == 'coordinates': local_filename = mdshare.fetch( 'alanine-dipeptide-3x250ns-heavy-atom-positions.npz') traj_whole = np.load(local_filename)['arr_0'] retval.append(traj_whole) if return_dihedrals: dihedral = np.load( mdshare.fetch( 'alanine-dipeptide-3x250ns-backbone-dihedrals.npz'))['arr_0'] retval.append(dihedral) return retval
def loadData( ): #function which downloads data from FTP server or loads it from file if already downloaded and then returns it as ndarray dataset = mdshare.fetch( 'alanine-dipeptide-3x250ns-heavy-atom-distances.npz') with np.load(dataset) as f: X = np.vstack([f[key] for key in sorted(f.keys())]) return X
def test(catalogue_file, checksum_file): repository = Repository(catalogue_file, checksum_file) working_directory = 'mdshare-testing-area' os.mkdir(working_directory) for file in repository.index: local_file = fetch( file, working_directory=working_directory, repository=repository) os.remove(local_file) for file in repository.containers: local_files = fetch( file, working_directory=working_directory, repository=repository) try: os.remove(local_files) except TypeError: for local_file in local_files: os.remove(local_file) os.rmdir(working_directory)
def funkcja(n): dataset = mdshare.fetch( 'alanine-dipeptide-3x250ns-heavy-atom-distances.npz') with np.load(dataset) as f: X = np.vstack([f[key] for key in sorted(f.keys())]) X_new = X[:1000] color = X_new[:, 1] Y = Funkcja.fun(X_new, n) #print(X_new) #x=X_new.flatten() #y=Y.flatten() #print("aaaaa") plt.scatter(Y[:, 0], Y[:, 1], c=color)
def test_mlmsm_pipeline(self): file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data') with np.load(file) as fh: data = fh['trajectory'] transition_matrix = fh['transition_matrix'] pipeline = Pipeline(steps=[ ('tica', tica.TICA(lagtime=1, dim=1)), ('cluster', kmeans.KmeansClustering(n_clusters=2, max_iter=500)), ('counts', TransitionCountEstimator(lagtime=1, count_mode="sliding")) ]) pipeline.fit(data) counts = pipeline[-1].fetch_model().submodel_largest() mlmsm = msm.MaximumLikelihoodMSM().fit(counts).fetch_model() P = mlmsm.pcca(2).coarse_grained_transition_matrix mindist = min(np.linalg.norm(P - transition_matrix), np.linalg.norm(P - transition_matrix.T)) assert mindist < 0.05
def funkcja(samp, dim, file_in, file_out): dataset = mdshare.fetch(file_in) with np.load(dataset) as f: X = np.vstack([f[key] for key in sorted(f.keys())]) samples = np.arange(0, X.shape[0], samp) X_sample = X[samples, :] mds = manifold.TSNE(n_components=dim, init='pca') Y = mds.fit_transform(X_sample) k = kde.gaussian_kde(Y.T) xi, yi = np.mgrid[Y[:, 0].min():Y[:, 0].max():100 * 1j, Y[:, 1].min():Y[:, 1].max():100 * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) plt.pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.Blues) plt.contour(xi, yi, zi.reshape(xi.shape)) plt.savefig(file_out, transparent=True, bbox_inches='tight')
def Main(): global V_sampling, V_learning_rate, V_n_iter, V_min_grad_norm, V_visible_points, V_pca, args # Setting variables from console parser = argparse.ArgumentParser( prog='project', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ Marta Preis (285129) ---------------------------------------- Converting high-dimensional tensor and returns its projection onto a low-dimensional space. ---------------------------------------- ''')) parser.add_argument( '-d', '--data', metavar='', default='alanine-dipeptide-3x250ns-heavy-atom-distances.npz', help='Path do data or name (download using mdshare)') parser.add_argument('-s', '--sampling', metavar='', type=int, default=500, help='Using only every x sample. (default: 500)') parser.add_argument( '-l', '--learning_rate', metavar='', type=int, default=200, help= 'The learning rate is usually in the range [10.0, 1000.0]. (default: 200)' ) parser.add_argument( '-i', '--n_iter', metavar='', type=int, default=1000, help= 'Maximum number of iterations for the optimization. Should be at least 250. (default: 1000)' ) parser.add_argument( '-n', '--min_grad_norm', metavar='', type=float, default=1e-5, help= 'If the gradient norm is below this threshold, the optimization will be stopped. (default: 1e-5)' ) parser.add_argument('-v', '--visible_points', action='store_true', help='Show all points in plot') parser.add_argument('-p', '--pca', action='store_true', help='Initialization of embedding pca') # Show help parser.print_help() # Set variables args = parser.parse_args() V_data = args.data V_sampling = args.sampling V_learning_rate = args.learning_rate V_n_iter = args.n_iter V_min_grad_norm = args.min_grad_norm V_visible_points = args.visible_points V_pca = args.pca # Check if file is available, if not, download data if os.path.isfile(V_data): dataset = yaml.safe_load(V_data) else: dataset = mdshare.fetch(V_data) with np.load(dataset) as f: Y = np.vstack([f[key] for key in sorted(f.keys())]) # Fitting function fit(Y)
'-sol', '--solver', type=str, default='auto', help= "Metoda rozwiazania oparta na SVD: 'auto' - najbardziej wydajna metoda losowa, inne: 'full', 'arpack','randomized'; domyslnie: auto " ) #przypisanie odpowiednich argumentow args = parser.parse_args() Dimensions = args.dimensions Step = args.step Solver = args.solver #pobieranie danych z biblioteki mdshare dataset = mdshare.fetch('alanine-dipeptide-3x250ns-heavy-atom-distances.npz') with np.load(dataset) as f: X = np.vstack([f[key] for key in sorted(f.keys())]) #Standaryzacja danych wejsciowych w celu ujednolicenia danych (można pominąc) X_std = StandardScaler().fit_transform(X) if Dimensions == 3: #wlasciwa redukcja wymiarow Y = PCA(n_components=Dimensions, svd_solver=Solver).fit_transform(X_std[::Step]) #dopasowanie do zakresu od -pi do +pi Y[:, 0] = np.interp(Y[:, 0], (Y[:, 0].min(), Y[:, 0].max()), (-np.pi, np.pi))
#!/usr/bin/env python ''' @Author ymh @Email [email protected] @Date 2021-08-29 15:03:30 @Web https://github.com/Aunity ''' import os, sys from mdshare import fetch fetch('pentapeptide-impl-solv.pdb') fetch('pentapeptide-*-500ns-impl-solv.xtc')
X, bioclustering.molecule_distances.crmsd_kabsch) print('cutoff:', np.mean(D)) [I, M] = bioclustering.clustering.gromos(D, np.mean(D)) print(I, M) # GROMOS using d-RMSD # create distance matrix using d-RMSD D = bioclustering.molecule_distances.create_distance_mat( X, bioclustering.molecule_distances.drmsd) print('cutoff:', np.mean(D)) [I, M] = bioclustering.clustering.gromos(D, np.mean(D)) print(I, M) # Clustering (e.g. k-means) using atom distances vectors as features # create atom distances vectors X_atom_dists = [] for mol in X: X_atom_dists.append( bioclustering.featurization.atom_distances_vector(mol)) kmeans = KMeans(n_clusters=5, random_state=0).fit(X_atom_dists) print(kmeans.labels_) print(kmeans.cluster_centers_) # Use PyEmma to download trajectories & extract features using featurizer files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data') pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data') X_load = pyemma.coordinates.load(files, top=pdb)