def pca(self, u, sele): import MDAnalysis.analysis.pca as pca uni_pca = pca.PCA(u, select=sele) uni_pca.run() n_pcs = np.where(uni_pca.cumulated_variance > 0.95)[0][0] atomgroup = u.select_atoms(sele) pca_space = uni_pca.transform(atomgroup, n_components=n_pcs) self.df = pd.DataFrame(pca_space) return self.df
def analyze_pca(u: mda.Universe, n_dimensions=40): """Fetch PCA component contribution values for a single trajectory.""" pca_analysis = pca.PCA(u, select='backbone') space = pca_analysis.run() space_3 = space.transform(u.select_atoms('backbone'), 3) w = pca.cosine_content(space_3, 0) print(w) return [ space.variance[:n_dimensions], space.cumulated_variance[:n_dimensions] ]
def test_cosine_content(): rand = MDAnalysis.Universe(RANDOM_WALK_TOPO, RANDOM_WALK) pca_random = pca.PCA(rand).run() dot = pca_random.transform(rand.atoms) content = pca.cosine_content(dot, 0) assert_almost_equal(content, .99, 1)
def test_transform_universe(): u1 = MDAnalysis.Universe(waterPSF, waterDCD) u2 = MDAnalysis.Universe(waterPSF, waterDCD) pca_test = pca.PCA(u1).run() pca_test.transform(u2)
def setUp(self): self.u = MDAnalysis.Universe(PSF, DCD) self.u.transfer_to_memory() self.pca = pca.PCA(self.u, select='backbone and name CA', align=False) self.pca.run() self.n_atoms = self.u.select_atoms('backbone and name CA').n_atoms
#print("The lentgh of the trajectory consist in:{} frames.".format(len(obj1.trajectory))) #firstly i need to align the trajectory: ___----->not true probably print("Aligning trajectories...") #aligner1 = align.AlignTraj(obj1, obj1, select="name CA", in_memory=True, verbose=True).run() aligner2 = align.AlignTraj(obj2, obj2, select="name CA", in_memory=True, verbose=True).run() #then perform the Principal components analysis: print("Extracting principal components...") #pc_obj1 = pca.PCA(obj1, select="name CA", align=False, mean = None, n_components= None, verbose=True).run() pc_obj2 = pca.PCA(obj2, select="name CA", align=False, mean=None, n_components=None, verbose=True).run() #first trj """ print("Total variance of MUTATED trajectory is:{}".format(sum(pc_obj1.cumulated_variance))) print("Plotting the cumulative variance of MUTATED principal components:") #plotting cumulated vanriance per Principal component: plot(pc_obj1.cumulated_variance[:10]) xlabel("Principal component") ylabel("Cumulative variance") show() #plotting atom pos in pc space print("Projecting atom position in the space of firsts 8 MUTATED principal components:")
import MDAnalysis as mda import MDAnalysis.analysis.pca as pca import numpy as np import sys if __name__ == "__main__": # reading structure and trajectory from command line topology = sys.argv[1] trajectory = sys.argv[2] # initialization input u = mda.Universe(topology, trajectory) # running PCA input_pca = pca.PCA(u) input_pca.run() n_pcs = np.where(input_pca.cumulated_variance > 0.95)[0][0] print(n_pcs) pca_space = input_pca.transform(u, n_components=n_pcs) print(pca_space)
def main(): """Run main procedure.""" # TODO(schneiderfelipe): accept multiple files parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("traj_files", nargs="+") args = parser.parse_args() gnorms = [] energies = [] all_atomnos = [] all_atomcoords = [] for traj_file in args.traj_files: atomnos, comments, atomcoords = read_xyz(traj_file) all_atomnos.extend(atomnos) all_atomcoords.extend(atomcoords) for comment in comments: fields = comment.split() gnorms.append(float(fields[3])) energies.append(float(fields[1])) energies = np.array(energies) energies -= energies.min() energies *= hartree * N_A / (kilo * calorie) u = mda.Universe.empty(n_atoms=len(all_atomnos[0]), trajectory=True) u.add_TopologyAttr("type", [element[i] for i in all_atomnos[0]]) u.load_new(all_atomcoords, order="fac") print(u) selection = None print("(enter 'q' for exit, 'h' for help)") while True: code = input("select> ").strip().split() if code[0] == "q": break elif code[0] == "h": for key in commands: print(f"{key:15s}: {commands[key]}") elif code[0] == "e": fig, ax = plt.subplots(2) ax[0].plot(energies) ax[0].set_xlabel("frame") ax[0].set_ylabel("energy (kcal/mol)") ax[1].plot(gnorms) ax[1].set_xlabel("frame") ax[1].set_ylabel("grad. norm (Eh/a0)") plt.show() elif code[0] == "s": print(selection) if selection is not None: print(selection_text) elif code[0] == "pca": if selection is None: print("empty selection, doing nothing") continue p = pca.PCA(u, select=selection_text) p.run() n_pcs = np.where(p.cumulated_variance > 0.95)[0][0] print(n_pcs) print(p.cumulated_variance[0:n_pcs]) pca_space = p.transform(selection, n_components=n_pcs) print(pca_space) print(pca.cosine_content(pca_space, 0)) elif code[0] == "p": if selection is None: print("empty selection, doing nothing") continue n = len(selection) if n == 2: data_label = "bond length (Å)" elif n == 3: data_label = "bond angle (°)" elif n == 4: data_label = "dihedral angle (°)" else: print("too few or too many indices") continue data = [] for i, (e, ts) in enumerate(zip(energies, u.trajectory)): if n == 2: d = distances.calc_bonds( selection[0].position, selection[1].position ) elif n == 3: d = np.degrees( distances.calc_angles( selection[0].position, selection[1].position, selection[2].position, ) ) elif n == 4: d = np.degrees( distances.calc_dihedrals( selection[0].position, selection[1].position, selection[2].position, selection[3].position, ) ) data.append(d) if i % 100 == 0 or i == len(u.trajectory) - 1: print( f"frame = {ts.frame:4d}: e = {e:5.1f} kcal/mol, {data_label.split('(')[0][:-1]} = {d:7.3f} {data_label[-2]}" ) data = np.array(data) fig, ax = plt.subplots(1, 2) ax[0].plot(data) ax[0].set_xlabel("frame") ax[0].set_ylabel(data_label) ax[1].plot(energies, data, "o", label="data points") ax[1].set_xlabel("energy (kcal/mol)") ax[1].set_ylabel(data_label) if n == 2: dx = 0.1 elif n == 3: dx = 10.0 elif n == 4: dx = 10.0 res = stats.binned_statistic( data, energies, "min", min(25, (data.max() - data.min()) / dx) ) # print(res.statistic) mask = np.isnan(res.statistic) res.statistic[mask] = np.interp( np.flatnonzero(mask), np.flatnonzero(~mask), res.statistic[~mask] ) # print(res.statistic) # ax[1].hlines(res.statistic, res.bin_edges[:-1], res.bin_edges[1:], colors='g', lw=2, label='binned min. energies') ax[1].barh( (res.bin_edges[:-1] + res.bin_edges[1:]) / 2, res.statistic, align="center", height=res.bin_edges[1:] - res.bin_edges[:-1], alpha=0.25, label="binned min. energies", ) ax[1].legend() plt.show() else: try: selection_text = " ".join(code) selection = u.select_atoms(selection_text) except mda.exceptions.SelectionError as e: print(e) print("bye")
parser.add_argument('--idcd', help='input dcd') parser.add_argument('--ipdb', help='input pdb') parser.add_argument('--icomponents', help='number of principle components') parser.add_argument('--iindex', help='index of the PC') parser.add_argument('--output', help='output') parser.add_argument('--cosout', help='cosine output') return parser.parse_args() args = parse_command_line(sys.argv) u = mda.Universe(args.ipdb, args.idcd, topology_format="PDB", format="DCD") components = int(args.icomponents) pca_index = int(args.iindex) PSF_pca = pca.PCA(u, select='backbone') PSF_pca.run() n_pcs = np.where(PSF_pca.cumulated_variance > 0.95)[0][0] atomgroup = u.select_atoms('backbone') pca_space = PSF_pca.transform(atomgroup, n_components = components) cosine = mda.analysis.pca.cosine_content(pca_space, pca_index) PCA = list(pca_space) with open(args.output, 'w') as f: writer = csv.writer(f, delimiter='\t') writer.writerows(PCA) with open(args.cosout, 'w') as f1: f1.write(str(cosine))