def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory # TODO: Remove once start/stop/step are deprecated from init # See if these have been set as class attributes, and use that start = getattr(self, 'start', start) stop = getattr(self, 'stop', stop) step = getattr(self, 'step', step) start, stop, step = trajectory.check_slice_indices(start, stop, step) self.start = start self.stop = stop self.step = step self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 verbose = getattr(self, '_verbose', False) self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=verbose)
def _prepare(self): n_dim = self._n_atoms * 3 self.cov = np.zeros((n_dim, n_dim)) self._ref_atom_positions = self._reference.positions self._ref_cog = self._reference.center_of_geometry() self._ref_atom_positions -= self._ref_cog if self._calc_mean: interval = int(self.n_frames // 100) interval = interval if interval > 0 else 1 format = ("Mean Calculation Step" "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format) for i, ts in enumerate( self._u.trajectory[self.start:self.stop:self.step]): if self.align: mobile_cog = self._atoms.center_of_geometry() mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog) else: self.mean += self._atoms.positions.ravel() mean_pm.echo(i) self.mean /= self.n_frames self.mean_atoms = self._atoms self.mean_atoms.positions = self._atoms.positions
def _prepare(self): n_dim = self._n_atoms * 3 self.cov = np.zeros((n_dim, n_dim)) self._ref_atom_positions = self._reference.positions self._ref_cog = self._reference.center_of_geometry() self._ref_atom_positions -= self._ref_cog if self._calc_mean: interval = int(self.n_frames // 100) interval = interval if interval > 0 else 1 format = ("Mean Calculation Step" "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format) for i, ts in enumerate(self._u.trajectory[self.start:self.stop: self.step]): if self.align: mobile_cog = self._atoms.center_of_geometry() mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog) else: self.mean += self._atoms.positions.ravel() mean_pm.echo(i) self.mean /= self.n_frames self.mean_atoms = self._atoms self.mean_atoms.positions = self._atoms.positions
def _selection_serial(self, universe, selection_str): selection = [] pm = ProgressMeter(universe.trajectory.n_frames, interval=10, verbose=True) for ts in universe.trajectory: selection.append(universe.select_atoms(selection_str)) pm.echo(ts.frame) return selection
def test_output(self, capsys): pm = ProgressMeter(10, interval=1) for i in range(10): pm.echo(i) out, err = capsys.readouterr() expected = 'Step 10/10 [100.0%]' actual = err.strip().split('\r')[-1] assert actual == expected
def write(self, filename, start=None, step=None, delta=None, load=True): """Write hopping trajectory as standard dcd file. write('TAP') :Arguments: load = True Immediately loads the trajectory so that further calls to next() will use the computed trajectory and don't use expensive mapping. Ignore the other options and leave them at the defaults. Currently, only the whole trajectory is written. All atoms in the original trajectory are written to the output so you should be able to use your original psf file. NOTE: Fixed atoms are possibly not accounted for properly. Note that it is your responsibility to load the TAP trajectory and the appropriate psf together as there is very limited information stored in the dcd itself. """ set_verbosity(self.verbosity) # this is stupid psfname = self.filename(filename, 'psf') dcdname = self.filename(filename, 'dcd') # see MDAnalysis/src/dcd/dcd.c for explanations if start is None: start = self.traj.start_timestep # starting time step for DCD file if step is None: step = self.traj.skip_timestep # NSAVC (# ts between written DCD frames) if delta is None: delta = self.traj.delta # length of ts (AKMA units) dcdwriter = MDAnalysis.DCD.DCDWriter(dcdname, self.ts.n_atoms, start, step, delta, remarks='TAP trajectory') pm = ProgressMeter( self.n_frames, interval=10, format= "Mapping TAP frame %(step)5d/%(numsteps)6d [%(percentage)5.1f%%]\r" ) for ts in self.map_dcd(): dcdwriter.write_next_timestep(ts) pm.echo(ts.frame) dcdwriter.close() logger.info("TAPTrajectory.write(): wrote TAP traj %r.", dcdname) if load is True: self.TAPtraj = MDAnalysis.DCD.DCDReader(dcdname) self.trajectory = self.TAPtraj
def _selection_serial(self, universe, selection_str): selected = [] pm = ProgressMeter(self.tf - self.t0, interval=10, verbose=True, offset=-self.t0) for ts in universe.trajectory[self.t0:self.tf]: selected.append(universe.select_atoms(selection_str)) pm.echo(ts.frame) return selected
def run(self, start=0, stop=-1, step=1, progout=10, quiet=False): """Calculate RMSF of given atoms across a trajectory. This method implements an algorithm for computing sums of squares while avoiding overflows and underflows [Welford1962]_. Parameters ---------- start : int (optional) starting frame [0] stop : int (optional) stopping frame [-1] step : int (optional) step between frames [1] progout : int (optional) number of frames to iterate through between updates to progress output; ``None`` for no updates [10] quiet : bool (optional) if ``True``, suppress all output (implies *progout* = ``None``) [``False``] References ---------- [Welford1962] B. P. Welford (1962). "Note on a Method for Calculating Corrected Sums of Squares and Products." Technometrics 4(3):419-420. """ sumsquares = np.zeros((self.atomgroup.n_atoms, 3)) means = np.array(sumsquares) if quiet: progout = None # set up progress output if progout: percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames, interval=progout) else: percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames, quiet=True) for k, ts in enumerate(self.atomgroup.universe.trajectory[start:stop:step]): sumsquares += (k/(k + 1.0)) * (self.atomgroup.positions - means)**2 means = (k * means + self.atomgroup.positions)/(k + 1) percentage.echo(ts.frame) rmsf = np.sqrt(sumsquares.sum(axis=1)/(k + 1)) if not (rmsf >= 0).all(): raise ValueError("Some RMSF values negative; overflow " + "or underflow occurred") self._rmsf = rmsf
def write(self, filename, start=None, step=None, delta=None, load=True): """Write hopping trajectory as standard dcd file, together with a minimal psf. write('hop') Arguments: load = True Immediately loads the trajectory so that further calls to next() will use the computed trajectory and don't use expensive mapping. Ignore the other options and leave them at the defaults. Currently, only the whole trajectory is written. For visualization one also needs the dummy psf of the group. Results: filename.trajectory and filename.psf Note that it is your responsibility to load the hopping trajectory and the appropriate psf together as there is very limited information stored in the dcd itself. """ set_verbosity(self.verbosity) # this is stupid psfname = self.filename(filename, 'psf') dcdname = self.filename(filename, 'dcd') pm = ProgressMeter( self.n_frames, interval=10, format= "Mapping frame %(step)5d/%(numsteps)6d [%(percentage)5.1f%%]\r") with MDAnalysis.Writer( dcdname, n_atoms=self.ts.n_atoms, dt=self.traj.dt, remarks='Hopping trajectory: x=site y=orbit_site z=0' ) as dcdwriter: for ts in self.map_dcd(): dcdwriter.write_next_timestep(ts) pm.echo(ts.frame) logger.info("HoppingTrajectory.write(): wrote hoptraj %r.", dcdname) self.write_psf(psfname) logger.info("HoppingTrajectory.write(): wrote hoppsf %r.", psfname) if load is True: self.__init__(filename=filename, verbosity=self.verbosity)
def write(self,filename,start=None,step=None,delta=None,load=True): """Write hopping trajectory as standard dcd file. write('TAP') :Arguments: load = True Immediately loads the trajectory so that further calls to next() will use the computed trajectory and don't use expensive mapping. Ignore the other options and leave them at the defaults. Currently, only the whole trajectory is written. All atoms in the original trajectory are written to the output so you should be able to use your original psf file. NOTE: Fixed atoms are possibly not accounted for properly. Note that it is your responsibility to load the TAP trajectory and the appropriate psf together as there is very limited information stored in the dcd itself. """ set_verbosity(self.verbosity) # this is stupid psfname = self.filename(filename,'psf') dcdname = self.filename(filename,'dcd') # see MDAnalysis/src/dcd/dcd.c for explanations if start is None: start = self.traj.start_timestep # starting time step for DCD file if step is None: step = self.traj.skip_timestep # NSAVC (# ts between written DCD frames) if delta is None: delta = self.traj.delta # length of ts (AKMA units) dcdwriter = MDAnalysis.DCD.DCDWriter(dcdname,self.ts.n_atoms, start,step,delta, remarks='TAP trajectory') pm = ProgressMeter(self.n_frames, interval=10, format="Mapping TAP frame %(step)5d/%(numsteps)6d [%(percentage)5.1f%%]\r") for ts in self.map_dcd(): dcdwriter.write_next_timestep(ts) pm.echo(ts.frame) dcdwriter.close() logger.info("TAPTrajectory.write(): wrote TAP traj %r.", dcdname) if load is True: self.TAPtraj = MDAnalysis.DCD.DCDReader(dcdname) self.trajectory = self.TAPtraj
def _prepare(self): # access start index self._u.trajectory[self.start] # reference will be start index self._reference = self._u.select_atoms(self._select) self._atoms = self._u.select_atoms(self._select) self._n_atoms = self._atoms.n_atoms if self._mean is None: self.mean = np.zeros(self._n_atoms * 3) self._calc_mean = True else: self.mean = self._mean.positions self._calc_mean = False if self.n_frames == 1: raise ValueError('No covariance information can be gathered from a' 'single trajectory frame.\n') n_dim = self._n_atoms * 3 self.cov = np.zeros((n_dim, n_dim)) self._ref_atom_positions = self._reference.positions self._ref_cog = self._reference.center_of_geometry() self._ref_atom_positions -= self._ref_cog if self._calc_mean: interval = int(self.n_frames // 100) interval = interval if interval > 0 else 1 format = ("Mean Calculation Step" "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]") mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format) for i, ts in enumerate( self._u.trajectory[self.start:self.stop:self.step]): if self.align: mobile_cog = self._atoms.center_of_geometry() mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog) else: self.mean += self._atoms.positions.ravel() mean_pm.echo(i) self.mean /= self.n_frames self.mean_atoms = self._atoms self.mean_atoms.positions = self._atoms.positions
def test_deprecated(self, capsys): with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. pm = ProgressMeter(10) # Verify the warning assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert "MDAnalysis.lib.log.ProgressBar" in str(w[-1].message)
def write(self,filename,start=None,step=None,delta=None,load=True): """Write hopping trajectory as standard dcd file, together with a minimal psf. write('hop') Arguments: load = True Immediately loads the trajectory so that further calls to next() will use the computed trajectory and don't use expensive mapping. Ignore the other options and leave them at the defaults. Currently, only the whole trajectory is written. For visualization one also needs the dummy psf of the group. Results: filename.trajectory and filename.psf Note that it is your responsibility to load the hopping trajectory and the appropriate psf together as there is very limited information stored in the dcd itself. """ set_verbosity(self.verbosity) # this is stupid psfname = self.filename(filename,'psf') dcdname = self.filename(filename,'dcd') pm = ProgressMeter(self.n_frames, interval=10, format="Mapping frame %(step)5d/%(numsteps)6d [%(percentage)5.1f%%]\r") with MDAnalysis.Writer(dcdname, n_atoms=self.ts.n_atoms, dt=self.traj.dt, remarks='Hopping trajectory: x=site y=orbit_site z=0') as dcdwriter: for ts in self.map_dcd(): dcdwriter.write_next_timestep(ts) pm.echo(ts.frame) logger.info("HoppingTrajectory.write(): wrote hoptraj %r.", dcdname) self.write_psf(psfname) logger.info("HoppingTrajectory.write(): wrote hoppsf %r.", psfname) if load is True: self.__init__(filename=filename,verbosity=self.verbosity)
def run(self, force=False): """ Run all the required passes :Keywords: *force* Will overwrite previous results if they exist """ # if results exist, don't waste any time if not self.solution['results'] is None and not force: return master_results = numpy.zeros_like(numpy.arange(self._starts[0], self._stops[0], self._skip), dtype=numpy.float32) # for normalising later counter = numpy.zeros_like(master_results, dtype=numpy.float32) pm = ProgressMeter(self.nruns, interval=1, format="Performing run %(step)5d/%(numsteps)d" "[%(percentage)5.1f%%]\r") for i, (start, stop) in enumerate(izip(self._starts, self._stops)): pm.echo(i + 1) # needed else trj seek thinks a numpy.int64 isn't an int? results = self._single_run(int(start), int(stop)) nresults = len(results) if nresults == len(master_results): master_results += results counter += 1.0 else: master_results[:nresults] += results counter[:nresults] += 1.0 master_results /= counter self.solution['time'] = numpy.arange( len(master_results), dtype=numpy.float32) * self.u.trajectory.dt * self._skip self.solution['results'] = master_results
def run(self, force=False): """Run all the required passes Parameters ---------- force : bool, optional Will overwrite previous results if they exist """ # if results exist, don't waste any time if self.solution['results'] is not None and not force: return master_results = np.zeros_like(np.arange(self._starts[0], self._stops[0], self._skip), dtype=np.float32) # for normalising later counter = np.zeros_like(master_results, dtype=np.float32) pm = ProgressMeter(self.nruns, interval=1, format="Performing run %(step)5d/%(numsteps)d" "[%(percentage)5.1f%%]\r") for i, (start, stop) in enumerate(zip(self._starts, self._stops)): pm.echo(i + 1) # needed else trj seek thinks a np.int64 isn't an int? results = self._single_run(int(start), int(stop)) nresults = len(results) if nresults == len(master_results): master_results += results counter += 1.0 else: master_results[:nresults] += results counter[:nresults] += 1.0 master_results /= counter self.solution['time'] = np.arange( len(master_results), dtype=np.float32) * self.u.trajectory.dt * self._skip self.solution['results'] = master_results
def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory start, stop, step = trajectory.check_slice_indices(start, stop, step) self.start = start self.stop = stop self.step = step self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 # ensure _verbose is set when __init__ wasn't called, this is to not # break pre 0.16.0 API usage of AnalysisBase if not hasattr(self, '_verbose'): if hasattr(self, '_quiet'): # Here, we are in the odd case where a children class defined # self._quiet without going through AnalysisBase.__init__. warnings.warn( "The *_quiet* attribute of analyses is " "deprecated (from 0.16)use *_verbose* instead.", DeprecationWarning) self._verbose = not self._quiet else: self._verbose = True self._quiet = not self._verbose self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose)
def run(self, start=0, stop=-1, step=1, progout=10, quiet=False): """Calculate RMSF of given atoms across a trajectory. This method implements an algorithm for computing sums of squares while avoiding overflows and underflows [Welford1962]_. Parameters ---------- start : int (optional) starting frame [0] stop : int (optional) stopping frame [-1] step : int (optional) step between frames [1] progout : int (optional) number of frames to iterate through between updates to progress output; ``None`` for no updates [10] quiet : bool (optional) if ``True``, suppress all output (implies *progout* = ``None``) [``False``] References ---------- [Welford1962] B. P. Welford (1962). "Note on a Method for Calculating Corrected Sums of Squares and Products." Technometrics 4(3):419-420. """ sumsquares = np.zeros((self.atomgroup.n_atoms, 3)) means = np.array(sumsquares) if quiet: progout = None # set up progress output if progout: percentage = ProgressMeter( self.atomgroup.universe.trajectory.n_frames, interval=progout) else: percentage = ProgressMeter( self.atomgroup.universe.trajectory.n_frames, quiet=True) for k, ts in enumerate( self.atomgroup.universe.trajectory[start:stop:step]): sumsquares += (k / (k + 1.0)) * (self.atomgroup.positions - means)**2 means = (k * means + self.atomgroup.positions) / (k + 1) percentage.echo(ts.frame) rmsf = np.sqrt(sumsquares.sum(axis=1) / (k + 1)) if not (rmsf >= 0).all(): raise ValueError("Some RMSF values negative; overflow " + "or underflow occurred") self._rmsf = rmsf
def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory self.start = start self.stop = stop self.step = step start, stop, step = trajectory.check_slice_indices(start, stop, step) self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 # ensure _verbose is set when __init__ wasn't called, this is to not # break pre 0.16.0 API usage of AnalysisBase if not hasattr(self, '_verbose'): if hasattr(self, '_quiet'): # Here, we are in the odd case where a children class defined # self._quiet without going through AnalysisBase.__init__. warnings.warn("The *_quiet* attribute of analyses is " "deprecated (from 0.16)use *_verbose* instead.", DeprecationWarning) self._verbose = not self._quiet else: self._verbose = True self._quiet = not self._verbose self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose)
def helanal_trajectory(universe, selection="name CA", start=None, end=None, begin=None, finish=None, matrix_filename="bending_matrix.dat", origin_pdbfile="origin.pdb", summary_filename="summary.txt", screw_filename="screw.xvg", tilt_filename="local_tilt.xvg", fitted_tilt_filename="fit_tilt.xvg", bend_filename="local_bend.xvg", twist_filename="unit_twist.xvg", prefix="helanal_", ref_axis=None, quiet=False): """Perform HELANAL_ helix analysis on all frames in *universe*. .. Note:: Only a single helix is analyzed. Use the selection to specify the helix, e.g. with "name CA and resid 1:20" or use start=1, stop=20. :Arguments: *universe* :class:`~MDAnalysis.core.AtomGroup.Universe` :Keywords: *selection* selection string that selects Calpha atoms [``"name CA"``] *start* start residue resid *end* end residue resid *begin* start analysing for time (ps) >= *begin*; ``None`` starts from the beginning [``None``] *finish* stop analysis for time (ps) =< *finish*; ``None`` goes to the end of the trajectory [``None``] *matrix_filename* Output file- bending matrix [``"bending_matrix.dat"``] *origin_pdbfile* Output file- origin pdb file [``"origin.pdb"``] *summary_filename* Output file- all of the basic data [``"summary.txt"``] *screw_filename* Output file- local tilts of individual residues from 2 to n-1 [``"screw.xvg"``] *tilt_filename* Output file- tilt of line of best fit applied to origin axes [``"local_tilt.xvg"``] *bend_filename* Output file- local bend angles between successive local helix axes [``"local_bend.xvg"``] *twist_filename* Output file- local unit twist between successive helix turns [``"unit_twist.xvg"``] *prefix* Prefix to add to all output file names; set to ``None`` to disable [``"helanal__"``] *ref_axis* Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]`` is chosen [``None``] *quiet* Suppress most diagnostic output. :Raises: FinishTimeException If the specified finish time precedes the specified start time or current time stamp of trajectory object. .. versionchanged:: 0.13.0 New *quiet* keyword to silence frame progress output and most of the output that used to be printed to stdout is now logged to the logger *MDAnalysis.analysis.helanal* (at logelevel *INFO*). """ if ref_axis is None: ref_axis = np.array([0., 0., 1.]) else: # enable MDA API so that one can use a tuple of atoms or AtomGroup with # two atoms ref_axis = np.asarray(ref_axis) if not (start is None and end is None): if start is None: start = universe.atoms[0].resid if end is None: end = universe.atoms[-1].resid selection += " and resid {start:d}:{end:d}".format(**vars()) ca = universe.select_atoms(selection) trajectory = universe.trajectory if finish is not None: if trajectory.ts.time > finish: # you'd be starting with a finish time (in ps) that has already passed or not # available raise FinishTimeException( 'The input finish time ({finish} ps) precedes the current trajectory time of {traj_time} ps.' .format(finish=finish, traj_time=trajectory.time)) if start is not None and end is not None: logger.info("Analysing from residue %d to %d", start, end) elif start is not None and end is None: logger.info("Analysing from residue %d to the C termini", start) elif start is None and end is not None: logger.info("Analysing from the N termini to %d", end) logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues) if prefix is not None: prefix = str(prefix) matrix_filename = prefix + matrix_filename origin_pdbfile = prefix + origin_pdbfile summary_filename = prefix + summary_filename screw_filename = prefix + screw_filename tilt_filename = prefix + tilt_filename fitted_tilt_filename = prefix + fitted_tilt_filename bend_filename = prefix + bend_filename twist_filename = prefix + twist_filename backup_file(matrix_filename) backup_file(origin_pdbfile) backup_file(summary_filename) backup_file(screw_filename) backup_file(tilt_filename) backup_file(fitted_tilt_filename) backup_file(bend_filename) backup_file(twist_filename) global_height = [] global_twist = [] global_rnou = [] global_bending = [] global_bending_matrix = [] global_tilt = [] global_fitted_tilts = [] global_screw = [] pm = ProgressMeter(trajectory.n_frames, quiet=quiet, format="Frame %(step)10d: %(time)20.1f ps\r") for ts in trajectory: pm.echo(ts.frame, time=ts.time) frame = ts.frame if begin is not None: if trajectory.time < begin: continue if finish is not None: if trajectory.time > finish: break ca_positions = ca.positions twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \ main_loop(ca_positions, ref_axis=ref_axis) origin_pdb(origins, origin_pdbfile) #calculate local bending matrix( it is looking at all i, j combinations) if len(global_bending_matrix) == 0: global_bending_matrix = [[[] for item in local_helix_axes] for item in local_helix_axes] for i in range(len(local_helix_axes)): for j in range(i + 1, len(local_helix_axes)): angle = np.rad2deg( np.arccos(np.dot(local_helix_axes[i], local_helix_axes[j]))) global_bending_matrix[i][j].append(angle) #global_bending_matrix[j][i].append(angle) #global_bending_matrix[i][i].append(0.) fit_vector, fit_tilt = vector_of_best_fit(origins) global_height += height global_twist += twist global_rnou += rnou #global_screw.append(local_screw_angles) global_fitted_tilts.append(np.rad2deg(fit_tilt)) #print out rotations across the helix to a file with open(twist_filename, "a") as twist_output: print(frame, end='', file=twist_output) for loc_twist in twist: print(loc_twist, end='', file=twist_output) print("", file=twist_output) with open(bend_filename, "a") as bend_output: print(frame, end='', file=bend_output) for loc_bend in bending_angles: print(loc_bend, end='', file=bend_output) print("", file=bend_output) with open(screw_filename, "a") as rot_output: print(frame, end='', file=rot_output) for rotation in local_screw_angles: print(rotation, end='', file=rot_output) print("", file=rot_output) with open(tilt_filename, "a") as tilt_output: print(frame, end='', file=tilt_output) for tilt in local_helix_axes: print(np.rad2deg(mdamath.angle(tilt, ref_axis)), end='', file=tilt_output) print("", file=tilt_output) with open(fitted_tilt_filename, "a") as tilt_output: print(frame, np.rad2deg(fit_tilt), file=tilt_output) if len(global_bending) == 0: global_bending = [[] for item in bending_angles] #global_tilt = [ [] for item in local_helix_axes ] for store, tmp in zip(global_bending, bending_angles): store.append(tmp) #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis)) twist_mean, twist_sd, twist_abdev = stats(global_twist) height_mean, height_sd, height_abdev = stats(global_height) rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou) ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts) bending_statistics = [stats(item) for item in global_bending] #tilt_statistics = [ stats(item) for item in global_tilt] bending_statistics_matrix = [[stats(col) for col in row] for row in global_bending_matrix] with open(matrix_filename, 'w') as mat_output: print("Mean", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[0]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print('\nSD', file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[1]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print("\nABDEV", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[2]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) logger.info("Height: %g SD: %g ABDEV: %g (Angstroem)", height_mean, height_sd, height_abdev) logger.info("Twist: %g SD: %g ABDEV: %g", twist_mean, twist_sd, twist_abdev) logger.info("Residues/turn: %g SD: %g ABDEV: %g", rnou_mean, rnou_sd, rnou_abdev) logger.info("Fitted tilt: %g SD: %g ABDEV: %g", ftilt_mean, ftilt_sd, ftilt_abdev) logger.info("Local bending angles:") residue_statistics = zip(*bending_statistics) measure_names = ["Mean ", "SD ", "ABDEV"] if start is None: output = " ".join([ "{0:8d}".format(item) for item in range(4, len(residue_statistics[0]) + 4) ]) else: output = " ".join([ "{0:8d}".format(item) for item in range(start + 3, len(residue_statistics[0]) + start + 3) ]) logger.info("ResID %s", output) for measure, name in zip(residue_statistics, measure_names): output = str(name) + " " output += " ".join(["{0:8.1f}".format(residue) for residue in measure]) logger.info(output) with open(summary_filename, 'w') as summary_output: print("Height:", height_mean, "SD", height_sd, "ABDEV", height_abdev, '(nm)', file=summary_output) print("Twist:", twist_mean, "SD", twist_sd, "ABDEV", twist_abdev, file=summary_output) print("Residues/turn:", rnou_mean, "SD", rnou_sd, "ABDEV", rnou_abdev, file=summary_output) print("Local bending angles:", file=summary_output) residue_statistics = list(zip(*bending_statistics)) measure_names = ["Mean ", "SD ", "ABDEV"] print("ResID", end='', file=summary_output) if start is None: for item in range(4, len(residue_statistics[0]) + 4): output = "{0:8d}".format(item) print(output, end='', file=summary_output) else: for item in range(start + 3, len(residue_statistics[0]) + start + 3): output = "{0:8d}".format(item) print(output, end='', file=summary_output) print('', file=summary_output) for measure, name in zip(residue_statistics, measure_names): print(name, end='', file=summary_output) for residue in measure: output = "{0:8.1f}".format(residue) print(output, end='', file=summary_output) print('', file=summary_output)
def density_from_Universe(universe, delta=1.0, select='name OH2', start=None, stop=None, step=None, metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, update_selection=False, verbose=False, interval=1, quiet=None, parameters=None, gridcenter=None, xdim=None, ydim=None, zdim=None): """Create a density grid from a :class:`MDAnalysis.Universe` object. The trajectory is read, frame by frame, and the atoms selected with `select` are histogrammed on a grid with spacing `delta`. A physical density of units [Angstrom^{-3}] is returned (see :class:`Density` for more details). Parameters ---------- universe : MDAnalysis.Universe :class:`MDAnalysis.Universe` object with a trajectory select : str (optional) selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta : float (optional) bin size for the density grid in Angstrom (same in x,y,z) [1.0] start : int (optional) stop : int (optional) step : int (optional) Slice the trajectory as ``trajectory[start:stop:step]``; default is to read the whole trajectory. metadata : dict. optional `dict` of additional data to be saved with the object; the meta data are passed through as they are. padding : float (optional) increase histogram dimensions by padding (on top of initial box size) in Angstrom. Padding is ignored when setting a user defined grid. [2.0] soluteselection : str (optional) MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff : float (optional) With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>" (Special routines that are faster than the standard ``AROUND`` selection); any value that evaluates to ``False`` (such as the default 0) disables this special selection. update_selection : bool (optional) Should the selection of atoms be updated for every step? [``False``] - ``True``: atom selection is updated for each frame, can be slow - ``False``: atoms are only selected at the beginning verbose : bool (optional) Print status update to the screen for every *interval* frame? [``True``] - ``False``: no status updates when a new frame is processed - ``True``: status update every frame (including number of atoms processed, which is interesting with ``update_selection=True``) interval : int (optional) Show status update every `interval` frame [1] parameters : dict (optional) `dict` with some special parameters for :class:`Density` (see docs) gridcenter : numpy ndarray, float32 (optional) 3 element numpy array detailing the x, y and z coordinates of the center of a user defined grid box in Angstrom [``None``] xdim : float (optional) User defined x dimension box edge in Angstrom; ignored if gridcenter is ``None`` ydim : float (optional) User defined y dimension box edge in Angstrom; ignored if gridcenter is ``None`` zdim : float (optional) User defined z dimension box edge in Angstrom; ignored if gridcenter is ``None`` Returns ------- :class:`Density` A :class:`Density` instance with the histogrammed data together with associated metadata. Notes ----- By default, the `select` is static, i.e., atoms are only selected once at the beginning. If you want *dynamically changing selections* (such as "name OW and around 4.0 (protein and not name H*)", i.e., the water oxygen atoms that are within 4 Å of the protein heavy atoms) then set ``update_selection=True``. For the special case of calculating a density of the "bulk" solvent away from a solute use the optimized selections with keywords *cutoff* and *soluteselection* (see Examples below). Examples -------- Basic use for creating a water density (just using the water oxygen atoms "OW"):: density = density_from_Universe(universe, delta=1.0, select='name OW') If you are only interested in water within a certain region, e.g., within a vicinity around a binding site, you can use a selection that updates every step by setting the `update_selection` keyword argument:: site_density = density_from_Universe(universe, delta=1.0, select='name OW and around 5 (resid 156 157 305)', update_selection=True) A special case for an updating selection is to create the "bulk density", i.e., the water outside the immediate solvation shell of a protein: Select all water oxygen atoms that are *farther away* than a given cut-off (say, 4 Å) from the solute (here, heavy atoms of the protein):: bulk = density_from_Universe(universe, delta=1.0, select='name OW', solute="protein and not name H*", cutoff=4) (Using the special case for the bulk with `soluteselection` and `cutoff` improves performance over the simple `update_selection` approach.) If you are interested in explicitly setting a grid box of a given edge size and origin, you can use the gridcenter and x/y/zdim arguments. For example to plot the density of waters within 5 Å of a ligand (in this case the ligand has been assigned the residue name "LIG") in a cubic grid with 20 Å edges which is centered on the centre of mass (COM) of the ligand:: # Create a selection based on the ligand ligand_selection = universe.select_atoms("resname LIG") # Extract the COM of the ligand ligand_COM = ligand_selection.center_of_mass() # Generate a density of waters on a cubic grid centered on the ligand COM # In this case, we update the atom selection as shown above. water_density = density_from_Universe(universe, delta=1.0, select='name OW around 5 resname LIG', update_selection=True, gridcenter=ligand_COM, xdim=20.0, ydim=20.0, zdim=20.0) (It should be noted that the `padding` keyword is not used when a user defined grid is assigned). As detailed above, the :class:`Density` object returned contains a physical density in units of Angstrom^{-3}. If you are interested in recovering the underlying probability density, simply divide by the sum:: physical_density = density_from_Universe(universe, delta=1.0, select='name OW') probability_density = physical_density / physical_density.grid.sum() Similarly, if you would like to recover a grid containing a histogram of atom counts, simply multiply by the volume:: # Here we assume that numpy is imported as np volume = np.prod(physical_density.delta) atom_count_histogram = physical_density * volume .. versionchanged:: 0.21.0 Warns users that `padding` value is not used in user defined grids .. versionchanged:: 0.20.0 ProgressMeter now iterates over the number of frames analysed. .. versionchanged:: 0.19.0 *gridcenter*, *xdim*, *ydim* and *zdim* keywords added to allow for user defined boxes .. versionchanged:: 0.13.0 *update_selection* and *quiet* keywords added .. deprecated:: 0.16 The keyword argument *quiet* is deprecated in favor of *verbose*. .. versionchanged:: 0.21.0 time_unit and length_unit default to ps and Angstrom now flags have been removed (same as previous flag defaults) """ u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory( u, select, soluteselection, cutoff, use_kdtree=use_kdtree, updating_selection=update_selection) def current_coordinates(): return notwithin_coordinates() else: group = u.select_atoms(select, updating=update_selection) def current_coordinates(): return group.positions coord = current_coordinates() logger.info( "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total." "".format(coord.shape[0], len(u.select_atoms(select)), select, len(u.atoms)) ) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:] if tuple(angles) != (90., 90., 90.): msg = ("Non-orthorhombic unit-cell --- " "make sure that it has been remapped properly!") warnings.warn(msg) logger.warning(msg) if gridcenter is not None: # Issue 2372: padding is ignored, defaults to 2.0 therefore warn if padding > 0: msg = ("Box padding (currently set at {0}) " "is not used in user defined grids.".format(padding)) warnings.warn(msg) logger.warning(msg) # Generate a copy of smin/smax from coords to later check if the # defined box might be too small for the selection smin = np.min(coord, axis=0) smax = np.max(coord, axis=0) # Overwrite smin/smax with user defined values smin, smax = _set_user_grid(gridcenter, xdim, ydim, zdim, smin, smax) else: # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = np.min(coord, axis=0) - padding smax = np.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = np.vstack((BINS['min'], BINS['max'])) arange = np.transpose(arange) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() start, stop, step = u.trajectory.check_slice_indices(start, stop, step) n_frames = len(range(start, stop, step)) pm = ProgressMeter(n_frames, interval=interval, verbose=verbose, format="Histogramming %(n_atoms)6d atoms in frame " "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]") for index, ts in enumerate(u.trajectory[start:stop:step]): coord = current_coordinates() pm.echo(index, n_atoms=len(coord)) if len(coord) == 0: continue h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram grid /= float(n_frames) metadata = metadata if metadata is not None else {} metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['select'] = select metadata['n_frames'] = n_frames metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = 'ps' try: metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames except AttributeError: metadata['trajectory_skip'] = 1 # seems to not be used.. try: metadata['trajectory_delta'] = u.trajectory.delta # in native units except AttributeError: metadata['trajectory_delta'] = 1 if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = parameters if parameters is not None else {} parameters['isDensity'] = False # must override g = Density(grid=grid, edges=edges, units={'length': 'Angstrom'}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
def density_from_Universe(universe, delta=1.0, atomselection='name OH2', start=None, stop=None, step=None, metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, update_selection=False, quiet=False, interval=1, **kwargs): """Create a density grid from a :class:`MDAnalysis.Universe` object. The trajectory is read, frame by frame, and the atoms selected with *atomselection* are histogrammed on a grid with spacing *delta*:: density_from_Universe(universe, delta=1.0, atomselection='name OH2', ...) --> density .. Note:: By default, the *atomselection* is static, i.e., atoms are only selected once at the beginning. If you want dynamically changing selections (such as "name OW and around 4.0 (protein and not name H*)") then set ``update_selection=True``. For the special case of calculating a density of the "bulk" solvent away from a solute use the optimized selections with keywords *cutoff* and *soluteselection*. :Arguments: universe :class:`MDAnalysis.Universe` object with a trajectory :Keywords: atomselection selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta bin size for the density grid in Angstroem (same in x,y,z) [1.0] start, stop, step Slice the trajectory as ``trajectory[start"stop:step]``; default is to read the whole trajectory. metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) in Angstroem [2.0] soluteselection MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff With *cutoff*, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>" (Special routines that are faster than the standard ``AROUND`` selection) [0] update_selection Should the selection of atoms be updated for every step? [``False``] - ``True``: atom selection is updated for each frame, can be slow - ``False``: atoms are only selected at the beginning quiet Print status update to the screen for every *interval* frame? [``False``] - ``True``: no status updates when a new frame is processed - ``False``: status update every frame (including number of atoms processed, which is interesting with ``update_selection=True``) interval Show status update every *interval* frame [1] parameters dict with some special parameters for :class:`Density` (see doc) kwargs metadata, parameters are modified and passed on to :class:`Density` :Returns: :class:`Density` .. versionchanged:: 0.13.0 *update_selection* and *quite* keywords added """ try: universe.select_atoms('all') universe.trajectory.ts except AttributeError: raise TypeError("The universe must be a proper MDAnalysis.Universe instance.") u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory(u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree) def current_coordinates(): return notwithin_coordinates() else: group = u.select_atoms(atomselection) def current_coordinates(): return group.coordinates() coord = current_coordinates() logger.info("Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total.".format(coord.shape[0], len(u.select_atoms(atomselection)), atomselection, len(u.atoms))) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:] if tuple(angles) != (90., 90., 90.): msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!" warnings.warn(msg) logger.warn(msg) # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = np.min(coord, axis=0) - padding smax = np.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = zip(BINS['min'], BINS['max']) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() pm = ProgressMeter(u.trajectory.n_frames, interval=interval, quiet=quiet, format="Histogramming %(n_atoms)6d atoms in frame " "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") start, stop, step = u.trajectory.check_slice_indices(start, stop, step) for ts in u.trajectory[start:stop:step]: if update_selection: group = u.select_atoms(atomselection) coord=group.positions else: coord = current_coordinates() pm.echo(ts.frame, n_atoms=len(coord)) if len(coord) == 0: continue h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram n_frames = len(range(start, stop, step)) grid /= float(n_frames) # pick from kwargs metadata = kwargs.pop('metadata', {}) metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['atomselection'] = atomselection metadata['n_frames'] = n_frames metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = MDAnalysis.core.flags['time_unit'] try: metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames except AttributeError: metadata['trajectory_skip'] = 1 # seems to not be used.. try: metadata['trajectory_delta'] = u.trajectory.delta # in native units except AttributeError: metadata['trajectory_delta'] = 1 if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = kwargs.pop('parameters', {}) parameters['isDensity'] = False # must override # all other kwargs are discarded g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
class AnalysisBase(object): """Base class for defining multi frame analysis The class it is designed as a template for creating multiframe analyses. This class will automatically take care of setting up the trajectory reader for iterating, and it offers to show a progress meter. To define a new Analysis, `AnalysisBase` needs to be subclassed `_single_frame` must be defined. It is also possible to define `_prepare` and `_conclude` for pre and post processing. See the example below. .. code-block:: python class NewAnalysis(AnalysisBase): def __init__(self, atomgroup, parameter, **kwargs): super(NewAnalysis, self).__init__(atomgroup.universe.trajectory, **kwargs) self._parameter = parameter self._ag = atomgroup def _prepare(self): # OPTIONAL # Called before iteration on the trajectory has begun. # Data structures can be set up at this time self.result = [] def _single_frame(self): # REQUIRED # Called after the trajectory is moved onto each new frame. # store result of `some_function` for a single frame self.result.append(some_function(self._ag, self._parameter)) def _conclude(self): # OPTIONAL # Called once iteration on the trajectory is finished. # Apply normalisation and averaging to results here. self.result = np.asarray(self.result) / np.sum(self.result) Afterwards the new analysis can be run like this. .. code-block:: python na = NewAnalysis(u.select_atoms('name CA'), 35).run(start=10, stop=20) print(na.result) """ def __init__(self, trajectory, verbose=False, **kwargs): """ Parameters ---------- trajectory : mda.Reader A trajectory Reader verbose : bool, optional Turn on more logging and debugging, default ``False`` """ self._trajectory = trajectory self._verbose = verbose # do deprecated kwargs # remove in 1.0 deps = [] for arg in ['start', 'stop', 'step']: if arg in kwargs and not kwargs[arg] is None: deps.append(arg) setattr(self, arg, kwargs[arg]) if deps: warnings.warn('Setting the following kwargs should be ' 'done in the run() method: {}'.format( ', '.join(deps)), DeprecationWarning) def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory # TODO: Remove once start/stop/step are deprecated from init # See if these have been set as class attributes, and use that start = getattr(self, 'start', start) stop = getattr(self, 'stop', stop) step = getattr(self, 'step', step) start, stop, step = trajectory.check_slice_indices(start, stop, step) self.start = start self.stop = stop self.step = step self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 verbose = getattr(self, '_verbose', False) self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=verbose) def _single_frame(self): """Calculate data from a single frame of trajectory Don't worry about normalising, just deal with a single frame. """ raise NotImplementedError("Only implemented in child classes") def _prepare(self): """Set things up before the analysis loop begins""" pass def _conclude(self): """Finalise the results you've gathered. Called at the end of the run() method to finish everything up. """ pass def run(self, start=None, stop=None, step=None, verbose=None): """Perform the calculation Parameters ---------- start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame verbose : bool, optional Turn on verbosity """ logger.info("Choosing frames to analyze") # if verbose unchanged, use class default verbose = getattr(self, '_verbose', False) if verbose is None else verbose self._setup_frames(self._trajectory, start, stop, step) logger.info("Starting preparation") self._prepare() for i, ts in enumerate( self._trajectory[self.start:self.stop:self.step]): self._frame_index = i self._ts = ts # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames)) self._single_frame() self._pm.echo(self._frame_index) logger.info("Finishing up") self._conclude() return self
def rms_fit_trj(traj, reference, select='all', filename=None, rmsdfile=None, prefix='rmsfit_', mass_weighted=False, tol_mass=0.1, strict=False, force=True, quiet=False, **kwargs): """RMS-fit trajectory to a reference structure using a selection. Both reference *ref* and trajectory *traj* must be :class:`MDAnalysis.Universe` instances. If they contain a trajectory then it is used. The output file format is determined by the file extension of *filename*. One can also use the same universe if one wants to fit to the current frame. :Arguments: *traj* trajectory, :class:`MDAnalysis.Universe` object *reference* reference coordinates; :class:`MDAnalysis.Universe` object (uses the current time step of the object) *select* 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical selections in *mobile* and *reference*; or 2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the :func:`fasta2select` function returns such a dictionary based on a ClustalW_ or STAMP_ sequence alignment); or 3. a tuple ``(sel1, sel2)`` When using 2. or 3. with *sel1* and *sel2* then these selections can also each be a list of selection strings (to generate a AtomGroup with defined atom order as described under :ref:`ordered-selections-label`). *filename* file name for the RMS-fitted trajectory or pdb; defaults to the original trajectory filename (from *traj*) with *prefix* prepended *rmsdfile* file name for writing the RMSD timeseries [``None``] *prefix* prefix for autogenerating the new output filename *mass_weighted* do a mass-weighted RMSD fit *tol_mass* Reject match if the atomic masses for matched atoms differ by more than *tol_mass* [0.1] *strict* Default: ``False`` - ``True``: Will raise :exc:`SelectioError` if a single atom does not match between the two selections. - ``False``: Will try to prepare a matching selection by dropping residues with non-matching atoms. See :func:`get_matching_atoms` for details. *force* - ``True``: Overwrite an existing output trajectory (default) - ``False``: simply return if the file already exists *quiet* - ``True``: suppress progress and logging for levels INFO and below. - ``False``: show all status messages and do not change the the logging level (default) .. Note:: If *kwargs* All other keyword arguments are passed on the trajectory :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing trajectories on the fly (e.g. change the output format by changing the extension of *filename* and setting different parameters as described for the corresponding writer). :Returns: *filename* (either provided or auto-generated) .. _ClustalW: http://www.clustal.org/ .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/ .. versionchanged:: 0.8 Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and *filename* is returned. .. versionchanged:: 0.10.0 Uses :func:`get_matching_atoms` to work with incomplete selections and new *strict* keyword. The new default is to be lenient whereas the old behavior was the equivalent of *strict* = ``True``. """ frames = traj.trajectory if quiet: # should be part of a try ... finally to guarantee restoring the log level logging.disable(logging.WARN) kwargs.setdefault('remarks', 'RMS fitted trajectory to reference') if filename is None: path, fn = os.path.split(frames.filename) filename = os.path.join(path, prefix + fn) _Writer = frames.Writer else: _Writer = frames.OtherWriter if os.path.exists(filename) and not force: logger.warn("{0} already exists and will NOT be overwritten; use force=True if you want this".format(filename)) return filename writer = _Writer(filename, **kwargs) del _Writer select = rms._process_selection(select) ref_atoms = reference.select_atoms(*select['reference']) traj_atoms = traj.select_atoms(*select['mobile']) natoms = traj_atoms.n_atoms ref_atoms, traj_atoms = get_matching_atoms(ref_atoms, traj_atoms, tol_mass=tol_mass, strict=strict) logger.info("RMS-fitting on {0:d} atoms.".format(len(ref_atoms))) if mass_weighted: # if performing a mass-weighted alignment/rmsd calculation weight = ref_atoms.masses / ref_atoms.masses.mean() else: weight = None # reference centre of mass system ref_com = ref_atoms.center_of_mass() ref_coordinates = ref_atoms.positions - ref_com # allocate the array for selection atom coords traj_coordinates = traj_atoms.positions.copy() # RMSD timeseries nframes = len(frames) rmsd = np.zeros((nframes,)) # R: rotation matrix that aligns r-r_com, x~-x~com # (x~: selected coordinates, x: all coordinates) # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com rot = np.zeros(9, dtype=np.float64) # allocate space for calculation R = np.matrix(rot.reshape(3, 3)) percentage = ProgressMeter(nframes, interval=10, quiet=quiet, format="Fitted frame %(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") for k, ts in enumerate(frames): # shift coordinates for rotation fitting # selection is updated with the time frame x_com = traj_atoms.center_of_mass().astype(np.float32) traj_coordinates[:] = traj_atoms.positions - x_com # Need to transpose coordinates such that the coordinate array is # 3xN instead of Nx3. Also qcp requires that the dtype be float64 # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd[k] = qcp.CalcRMSDRotationalMatrix(ref_coordinates.T.astype(np.float64), traj_coordinates.T.astype(np.float64), natoms, rot, weight) R[:, :] = rot.reshape(3, 3) # Transform each atom in the trajectory (use inplace ops to avoid copying arrays) # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".) ts.positions -= x_com ts.positions[:] = ts.positions * R # R acts to the left & is broadcasted N times. ts.positions += ref_com writer.write(traj.atoms) # write whole input trajectory system percentage.echo(ts.frame) logger.info("Wrote %d RMS-fitted coordinate frames to file %r", frames.n_frames, filename) if rmsdfile is not None: np.savetxt(rmsdfile, rmsd) logger.info("Wrote RMSD timeseries to file %r", rmsdfile) if quiet: # should be part of a try ... finally to guarantee restoring the log level logging.disable(logging.NOTSET) return filename
def density_from_Universe(universe, delta=1.0, atomselection='name OH2', start=None, stop=None, step=None, metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, update_selection=False, quiet=False, interval=1, **kwargs): """Create a density grid from a :class:`MDAnalysis.Universe` object. The trajectory is read, frame by frame, and the atoms selected with *atomselection* are histogrammed on a grid with spacing *delta*:: density_from_Universe(universe, delta=1.0, atomselection='name OH2', ...) --> density .. Note:: By default, the *atomselection* is static, i.e., atoms are only selected once at the beginning. If you want dynamically changing selections (such as "name OW and around 4.0 (protein and not name H*)") then set ``update_selection=True``. For the special case of calculating a density of the "bulk" solvent away from a solute use the optimized selections with keywords *cutoff* and *soluteselection*. :Arguments: universe :class:`MDAnalysis.Universe` object with a trajectory :Keywords: atomselection selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta bin size for the density grid in Angstroem (same in x,y,z) [1.0] start, stop, step Slice the trajectory as ``trajectory[start"stop:step]``; default is to read the whole trajectory. metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) in Angstroem [2.0] soluteselection MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff With *cutoff*, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>" (Special routines that are faster than the standard ``AROUND`` selection) [0] update_selection Should the selection of atoms be updated for every step? [``False``] - ``True``: atom selection is updated for each frame, can be slow - ``False``: atoms are only selected at the beginning quiet Print status update to the screen for every *interval* frame? [``False``] - ``True``: no status updates when a new frame is processed - ``False``: status update every frame (including number of atoms processed, which is interesting with ``update_selection=True``) interval Show status update every *interval* frame [1] parameters dict with some special parameters for :class:`Density` (see doc) kwargs metadata, parameters are modified and passed on to :class:`Density` :Returns: :class:`Density` .. versionchanged:: 0.13.0 *update_selection* and *quite* keywords added """ try: universe.select_atoms('all') universe.trajectory.ts except AttributeError: raise TypeError( "The universe must be a proper MDAnalysis.Universe instance.") u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory( u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree) def current_coordinates(): return notwithin_coordinates() else: group = u.select_atoms(atomselection) def current_coordinates(): return group.coordinates() coord = current_coordinates() logger.info( "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total.". format(coord.shape[0], len(u.select_atoms(atomselection)), atomselection, len(u.atoms))) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[ 3:] if tuple(angles) != (90., 90., 90.): msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!" warnings.warn(msg) logger.warn(msg) # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = np.min(coord, axis=0) - padding smax = np.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = zip(BINS['min'], BINS['max']) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() pm = ProgressMeter(u.trajectory.n_frames, interval=interval, quiet=quiet, format="Histogramming %(n_atoms)6d atoms in frame " "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") for ts in u.trajectory[start:stop:step]: if update_selection: group = u.select_atoms(atomselection) coord = group.positions else: coord = current_coordinates() pm.echo(ts.frame, n_atoms=len(coord)) if len(coord) == 0: continue h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram n_frames = u.trajectory.n_frames grid /= float(n_frames) # pick from kwargs metadata = kwargs.pop('metadata', {}) metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['atomselection'] = atomselection metadata['n_frames'] = n_frames metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = MDAnalysis.core.flags['time_unit'] try: metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames except AttributeError: metadata['trajectory_skip'] = 1 # seems to not be used.. try: metadata['trajectory_delta'] = u.trajectory.delta # in native units except AttributeError: metadata['trajectory_delta'] = 1 if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = kwargs.pop('parameters', {}) parameters['isDensity'] = False # must override # all other kwargs are discarded g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
def density_from_Universe(universe, delta=1.0, atomselection='name OH2', start=None, stop=None, step=None, metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, update_selection=False, verbose=None, interval=1, quiet=None, parameters=None): """Create a density grid from a :class:`MDAnalysis.Universe` object. The trajectory is read, frame by frame, and the atoms selected with `atomselection` are histogrammed on a grid with spacing `delta`. Parameters ---------- universe : MDAnalysis.Universe :class:`MDAnalysis.Universe` object with a trajectory atomselection : str (optional) selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta : float (optional) bin size for the density grid in Angstroem (same in x,y,z) [1.0] start : int (optional) stop : int (optional) step : int (optional) Slice the trajectory as ``trajectory[start:stop:step]``; default is to read the whole trajectory. metadata : dict. optional `dict` of additional data to be saved with the object; the meta data are passed through as they are. padding : float (optional) increase histogram dimensions by padding (on top of initial box size) in Angstroem [2.0] soluteselection : str (optional) MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff : float (optional) With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>" (Special routines that are faster than the standard ``AROUND`` selection); any value that evaluates to ``False`` (such as the default 0) disables this special selection. update_selection : bool (optional) Should the selection of atoms be updated for every step? [``False``] - ``True``: atom selection is updated for each frame, can be slow - ``False``: atoms are only selected at the beginning verbose : bool (optional) Print status update to the screen for every *interval* frame? [``True``] - ``False``: no status updates when a new frame is processed - ``True``: status update every frame (including number of atoms processed, which is interesting with ``update_selection=True``) interval : int (optional) Show status update every `interval` frame [1] parameters : dict (optional) `dict` with some special parameters for :class:`Density` (see docs) Returns ------- :class:`Density` A :class:`Density` instance with the histogrammed data together with associated metadata. Notes ----- By default, the `atomselection` is static, i.e., atoms are only selected once at the beginning. If you want *dynamically changing selections* (such as "name OW and around 4.0 (protein and not name H*)", i.e., the water oxygen atoms that are within 4 Å of the protein heavy atoms) then set ``update_selection=True``. For the special case of calculating a density of the "bulk" solvent away from a solute use the optimized selections with keywords *cutoff* and *soluteselection* (see Examples below). Examples -------- Basic use for creating a water density (just using the water oxygen atoms "OW"):: density = density_from_Universe(universe, delta=1.0, atomselection='name OW') If you are only interested in water within a certain region, e.g., within a vicinity around a binding site, you can use a selection that updates every step by setting the `update_selection` keyword argument:: site_density = density_from_Universe(universe, delta=1.0, atomselection='name OW and around 5 (resid 156 157 305)', update_selection=True) A special case for an updating selection is to create the "bulk density", i.e., the water outside the immediate solvation shell of a protein: Select all water oxygen atoms that are *farther away* than a given cut-off (say, 4 Å) from the solute (here, heavy atoms of the protein):: bulk = density_from_Universe(universe, delta=1.0, atomselection='name OW', solute="protein and not name H*", cutoff=4) (Using the special case for the bulk with `soluteselection` and `cutoff` improves performance over the simple `update_selection` approach.) .. versionchanged:: 0.13.0 *update_selection* and *quiet* keywords added .. deprecated:: 0.16 The keyword argument *quiet* is deprecated in favor of *verbose*. """ u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory( u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree, updating_selection=update_selection) def current_coordinates(): return notwithin_coordinates() else: group = u.select_atoms(atomselection, updating=update_selection) def current_coordinates(): return group.positions coord = current_coordinates() logger.info( "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total." "".format(coord.shape[0], len(u.select_atoms(atomselection)), atomselection, len(u.atoms)) ) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:] if tuple(angles) != (90., 90., 90.): msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!" warnings.warn(msg) logger.warning(msg) # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = np.min(coord, axis=0) - padding smax = np.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = np.vstack((BINS['min'], BINS['max'])) arange = np.transpose(arange) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() pm = ProgressMeter(u.trajectory.n_frames, interval=interval, verbose=verbose, quiet=quiet, format="Histogramming %(n_atoms)6d atoms in frame " "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") start, stop, step = u.trajectory.check_slice_indices(start, stop, step) for ts in u.trajectory[start:stop:step]: coord = current_coordinates() pm.echo(ts.frame, n_atoms=len(coord)) if len(coord) == 0: continue h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram n_frames = len(range(start, stop, step)) grid /= float(n_frames) metadata = metadata if metadata is not None else {} metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['atomselection'] = atomselection metadata['n_frames'] = n_frames metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = MDAnalysis.core.flags['time_unit'] try: metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames except AttributeError: metadata['trajectory_skip'] = 1 # seems to not be used.. try: metadata['trajectory_delta'] = u.trajectory.delta # in native units except AttributeError: metadata['trajectory_delta'] = 1 if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = parameters if parameters is not None else {} parameters['isDensity'] = False # must override g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
def run(self, start=None, stop=None, step=None, verbose=None, debug=None): """Analyze trajectory and produce timeseries. Stores the water bridge data per frame as :attr:`WaterBridgeAnalysis.timeseries` (see there for output format). Parameters ---------- start : int (optional) starting frame-index for analysis, ``None`` is the first one, 0. `start` and `stop` are 0-based frame indices and are used to slice the trajectory (if supported) [``None``] stop : int (optional) last trajectory frame for analysis, ``None`` is the last one [``None``] step : int (optional) read every `step` between `start` (included) and `stop` (excluded), ``None`` selects 1. [``None``] verbose : bool (optional) toggle progress meter output :class:`~MDAnalysis.lib.log.ProgressMeter` [``True``] debug : bool (optional) enable detailed logging of debugging information; this can create *very big* log files so it is disabled (``False``) by default; setting `debug` toggles the debug status for :class:`WaterBridgeAnalysis`, namely the value of :attr:`WaterBridgeAnalysis.debug`. See Also -------- :meth:`WaterBridgeAnalysis.generate_table` : processing the data into a different format. """ self._setup_frames(self.u.trajectory, start, stop, step) logger.info("WBridge analysis: starting") logger.debug("WBridge analysis: donors %r", self.donors) logger.debug("WBridge analysis: acceptors %r", self.acceptors) logger.debug("WBridge analysis: water bridge %r", self.water_selection) if debug is not None and debug != self.debug: self.debug = debug logger.debug("Toggling debug to %r", self.debug) if not self.debug: logger.debug("WBridge analysis: For full step-by-step debugging output use debug=True") self._timeseries = [] self.timesteps = [] self._water_network = [] if verbose is None: verbose = self._verbose pm = ProgressMeter(self.n_frames, format="WBridge frame {current_step:5d}: {step:5d}/{numsteps} [{percentage:5.1f}%]\r", verbose=verbose) logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", self.start, self.stop, self.step) for progress, ts in enumerate(self.u.trajectory[self.start:self.stop:self.step]): # all bonds for this timestep # dict of tuples (atom.index, atom.index) for quick check if # we already have the bond (to avoid duplicates) frame = ts.frame timestep = ts.time self.timesteps.append(timestep) pm.echo(progress, current_step=frame) self.logger_debug("Analyzing frame %(frame)d, timestep %(timestep)f ps", vars()) if self.update_selection1: self._update_selection_1() if self.update_selection2: self._update_selection_2() if self.update_water_selection: self._update_water_selection() s1_frame_results_dict = defaultdict(list) if (self.selection1_type in ('donor', 'both') and self._water_acceptors): self.logger_debug("Selection 1 Donors <-> Water Acceptors") ns_acceptors = AtomNeighborSearch(self._water_acceptors) for i, donor_h_set in self._s1_donors_h.items(): d = self._s1_donors[i] for h in donor_h_set: res = ns_acceptors.search(h, self.distance) for a in res: donor_atom = h if self.distance_type != 'heavy' else d dist = distances.calc_bonds(donor_atom.position, a.position) if dist <= self.distance: angle = distances.calc_angles(d.position, h.position, a.position) angle = np.rad2deg(angle) if angle >= self.angle: self.logger_debug( "S1-D: {0!s} <-> W-A: {1!s} {2:f} A, {3:f} DEG"\ .format(h.index, a.index, dist, angle)) s1_frame_results_dict[(a.resname, a.resid)].append( (h.index, a.index, (h.resname, h.resid, h.name), (a.resname, a.resid, a.name), dist, angle)) if (self.selection1_type in ('acceptor', 'both') and self._s1_acceptors): self.logger_debug("Selection 1 Acceptors <-> Water Donors") ns_acceptors = AtomNeighborSearch(self._s1_acceptors) for i, donor_h_set in self._water_donors_h.items(): d = self._water_donors[i] for h in donor_h_set: res = ns_acceptors.search(h, self.distance) for a in res: donor_atom = h if self.distance_type != 'heavy' else d dist = distances.calc_bonds(donor_atom.position, a.position) if dist <= self.distance: angle = distances.calc_angles(d.position, h.position, a.position) angle = np.rad2deg(angle) if angle >= self.angle: self.logger_debug( "S1-A: {0!s} <-> W-D: {1!s} {2:f} A, {3:f} DEG"\ .format(a.index, h.index, dist, angle)) s1_frame_results_dict[(h.resname, h.resid)].append( (h.index, a.index, (h.resname, h.resid, h.name), (a.resname, a.resid, a.name), dist, angle)) # Narrow down the water selection selection_resn_id = list(s1_frame_results_dict.keys()) if not selection_resn_id: self._timeseries.append([]) continue selection_resn_id = ['(resname {} and resid {})'.format( resname, resid) for resname, resid in selection_resn_id] water_bridges = self._water.select_atoms(' or '.join(selection_resn_id)) self.logger_debug("Size of water bridge selection: {0} atoms".format(len(water_bridges))) if not water_bridges: logger.warning("No water forming hydrogen bonding with selection 1.") water_bridges_donors = water_bridges.select_atoms( 'name {0}'.format(' '.join(self.donors))) water_bridges_donors_h = {} for i, d in enumerate(water_bridges_donors): tmp = self._get_bonded_hydrogens(d) if tmp: water_bridges_donors_h[i] = tmp self.logger_debug("water bridge donors: {0}".format(len(water_bridges_donors))) self.logger_debug("water bridge donor hydrogens: {0}".format(len(water_bridges_donors_h))) water_bridges_acceptors = water_bridges.select_atoms( 'name {0}'.format(' '.join(self.acceptors))) self.logger_debug("water bridge: {0}".format(len(water_bridges_acceptors))) # Finding the hydrogen bonds between water bridge and selection 2 s2_frame_results_dict = defaultdict(list) if self._s2_acceptors: self.logger_debug("Water bridge Donors <-> Selection 2 Acceptors") ns_acceptors = AtomNeighborSearch(self._s2_acceptors) for i, donor_h_set in water_bridges_donors_h.items(): d = water_bridges_donors[i] for h in donor_h_set: res = ns_acceptors.search(h, self.distance) for a in res: donor_atom = h if self.distance_type != 'heavy' else d dist = distances.calc_bonds(donor_atom.position, a.position) if dist <= self.distance: angle = distances.calc_angles(d.position, h.position, a.position) angle = np.rad2deg(angle) if angle >= self.angle: self.logger_debug( "WB-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG"\ .format(h.index, a.index, dist, angle)) s2_frame_results_dict[(h.resname, h.resid)].append( (h.index, a.index, (h.resname, h.resid, h.name), (a.resname, a.resid, a.name), dist, angle)) if water_bridges_acceptors: self.logger_debug("Selection 2 Donors <-> Selection 2 Acceptors") ns_acceptors = AtomNeighborSearch(water_bridges_acceptors) for i, donor_h_set in self._s2_donors_h.items(): d = self._s2_donors[i] for h in donor_h_set: res = ns_acceptors.search(h, self.distance) for a in res: donor_atom = h if self.distance_type != 'heavy' else d dist = distances.calc_bonds(donor_atom.position, a.position) if dist <= self.distance: angle = distances.calc_angles(d.position, h.position, a.position) angle = np.rad2deg(angle) if angle >= self.angle: self.logger_debug( "WB-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG"\ .format(a.index, h.index, dist, angle)) s2_frame_results_dict[(a.resname, a.resid)].append( (h.index, a.index, (h.resname, h.resid, h.name), (a.resname, a.resid, a.name), dist, angle)) # Generate the water network water_network = {} for key in s2_frame_results_dict: s1_frame_results = set(s1_frame_results_dict[key]) s2_frame_results = set(s2_frame_results_dict[key]) if len(s1_frame_results.union(s2_frame_results)) > 1: # Thus if selection 1 and selection 2 are the same and both # only form a single hydrogen bond with a water, this entry # won't be included. water_network[key] = [s1_frame_results, s2_frame_results.difference(s1_frame_results)] # Generate frame_results frame_results = [] for s1_frame_results, s2_frame_results in water_network.values(): frame_results.extend(list(s1_frame_results)) frame_results.extend(list(s2_frame_results)) self._timeseries.append(frame_results) self._water_network.append(water_network) logger.info("WBridge analysis: complete; timeseries %s.timeseries", self.__class__.__name__)
def helanal_trajectory(universe, selection="name CA", start=None, end=None, begin=None, finish=None, matrix_filename="bending_matrix.dat", origin_pdbfile="origin.pdb", summary_filename="summary.txt", screw_filename="screw.xvg", tilt_filename="local_tilt.xvg", fitted_tilt_filename="fit_tilt.xvg", bend_filename="local_bend.xvg", twist_filename="unit_twist.xvg", prefix="helanal_", ref_axis=None, quiet=False): """Perform HELANAL_ helix analysis on all frames in *universe*. .. Note:: Only a single helix is analyzed. Use the selection to specify the helix, e.g. with "name CA and resid 1:20" or use start=1, stop=20. :Arguments: *universe* :class:`~MDAnalysis.core.AtomGroup.Universe` :Keywords: *selection* selection string that selects Calpha atoms [``"name CA"``] *start* start residue resid *end* end residue resid *begin* start analysing for time (ps) >= *begin*; ``None`` starts from the beginning [``None``] *finish* stop analysis for time (ps) =< *finish*; ``None`` goes to the end of the trajectory [``None``] *matrix_filename* Output file- bending matrix [``"bending_matrix.dat"``] *origin_pdbfile* Output file- origin pdb file [``"origin.pdb"``] *summary_filename* Output file- all of the basic data [``"summary.txt"``] *screw_filename* Output file- local tilts of individual residues from 2 to n-1 [``"screw.xvg"``] *tilt_filename* Output file- tilt of line of best fit applied to origin axes [``"local_tilt.xvg"``] *bend_filename* Output file- local bend angles between successive local helix axes [``"local_bend.xvg"``] *twist_filename* Output file- local unit twist between successive helix turns [``"unit_twist.xvg"``] *prefix* Prefix to add to all output file names; set to ``None`` to disable [``"helanal__"``] *ref_axis* Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]`` is chosen [``None``] *quiet* Suppress most diagnostic output. :Raises: FinishTimeException If the specified finish time precedes the specified start time or current time stamp of trajectory object. .. versionchanged:: 0.13.0 New *quiet* keyword to silence frame progress output and most of the output that used to be printed to stdout is now logged to the logger *MDAnalysis.analysis.helanal* (at logelevel *INFO*). """ if ref_axis is None: ref_axis = np.array([0., 0., 1.]) else: # enable MDA API so that one can use a tuple of atoms or AtomGroup with # two atoms ref_axis = np.asarray(ref_axis) if not (start is None and end is None): if start is None: start = universe.atoms[0].resid if end is None: end = universe.atoms[-1].resid selection += " and resid {start:d}:{end:d}".format(**vars()) ca = universe.select_atoms(selection) trajectory = universe.trajectory if finish is not None: if trajectory.ts.time > finish: # you'd be starting with a finish time (in ps) that has already passed or not # available raise FinishTimeException( 'The input finish time ({finish} ps) precedes the current trajectory time of {traj_time} ps.'.format( finish=finish, traj_time=trajectory.time)) if start is not None and end is not None: logger.info("Analysing from residue %d to %d", start, end) elif start is not None and end is None: logger.info("Analysing from residue %d to the C termini", start) elif start is None and end is not None: logger.info("Analysing from the N termini to %d", end) logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues) if prefix is not None: prefix = str(prefix) matrix_filename = prefix + matrix_filename origin_pdbfile = prefix + origin_pdbfile summary_filename = prefix + summary_filename screw_filename = prefix + screw_filename tilt_filename = prefix + tilt_filename fitted_tilt_filename = prefix + fitted_tilt_filename bend_filename = prefix + bend_filename twist_filename = prefix + twist_filename backup_file(matrix_filename) backup_file(origin_pdbfile) backup_file(summary_filename) backup_file(screw_filename) backup_file(tilt_filename) backup_file(fitted_tilt_filename) backup_file(bend_filename) backup_file(twist_filename) global_height = [] global_twist = [] global_rnou = [] global_bending = [] global_bending_matrix = [] global_tilt = [] global_fitted_tilts = [] global_screw = [] pm = ProgressMeter(trajectory.n_frames, quiet=quiet, format="Frame %(step)10d: %(time)20.1f ps\r") for ts in trajectory: pm.echo(ts.frame, time=ts.time) frame = ts.frame if begin is not None: if trajectory.time < begin: continue if finish is not None: if trajectory.time > finish: break ca_positions = ca.positions twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \ main_loop(ca_positions, ref_axis=ref_axis) origin_pdb(origins, origin_pdbfile) #calculate local bending matrix( it is looking at all i, j combinations) if len(global_bending_matrix) == 0: global_bending_matrix = [[[] for item in local_helix_axes] for item in local_helix_axes] for i in range(len(local_helix_axes)): for j in range(i + 1, len(local_helix_axes)): angle = np.rad2deg(np.arccos(np.dot(local_helix_axes[i], local_helix_axes[j]))) global_bending_matrix[i][j].append(angle) #global_bending_matrix[j][i].append(angle) #global_bending_matrix[i][i].append(0.) fit_vector, fit_tilt = vector_of_best_fit(origins) global_height += height global_twist += twist global_rnou += rnou #global_screw.append(local_screw_angles) global_fitted_tilts.append(np.rad2deg(fit_tilt)) #print out rotations across the helix to a file with open(twist_filename, "a") as twist_output: print(frame, end='', file=twist_output) for loc_twist in twist: print(loc_twist, end='', file=twist_output) print("", file=twist_output) with open(bend_filename, "a") as bend_output: print(frame, end='', file=bend_output) for loc_bend in bending_angles: print(loc_bend, end='', file=bend_output) print("", file=bend_output) with open(screw_filename, "a") as rot_output: print(frame, end='', file=rot_output) for rotation in local_screw_angles: print(rotation, end='', file=rot_output) print("", file=rot_output) with open(tilt_filename, "a") as tilt_output: print(frame, end='', file=tilt_output) for tilt in local_helix_axes: print(np.rad2deg(mdamath.angle(tilt, ref_axis)), end='', file=tilt_output) print("", file=tilt_output) with open(fitted_tilt_filename, "a") as tilt_output: print(frame, np.rad2deg(fit_tilt), file=tilt_output) if len(global_bending) == 0: global_bending = [[] for item in bending_angles] #global_tilt = [ [] for item in local_helix_axes ] for store, tmp in zip(global_bending, bending_angles): store.append(tmp) #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis)) twist_mean, twist_sd, twist_abdev = stats(global_twist) height_mean, height_sd, height_abdev = stats(global_height) rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou) ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts) bending_statistics = [stats(item) for item in global_bending] #tilt_statistics = [ stats(item) for item in global_tilt] bending_statistics_matrix = [[stats(col) for col in row] for row in global_bending_matrix] with open(matrix_filename, 'w') as mat_output: print("Mean", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[0]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print('\nSD', file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[1]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print("\nABDEV", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[2]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) logger.info("Height: %g SD: %g ABDEV: %g (Angstroem)", height_mean, height_sd, height_abdev) logger.info("Twist: %g SD: %g ABDEV: %g", twist_mean, twist_sd, twist_abdev) logger.info("Residues/turn: %g SD: %g ABDEV: %g", rnou_mean, rnou_sd, rnou_abdev) logger.info("Fitted tilt: %g SD: %g ABDEV: %g", ftilt_mean, ftilt_sd, ftilt_abdev) logger.info("Local bending angles:") residue_statistics = zip(*bending_statistics) measure_names = ["Mean ", "SD ", "ABDEV"] if start is None: output = " ".join(["{0:8d}".format(item) for item in range(4, len(residue_statistics[0]) + 4)]) else: output = " ".join(["{0:8d}".format(item) for item in range(start + 3, len(residue_statistics[0]) + start + 3)]) logger.info("ResID %s", output) for measure, name in zip(residue_statistics, measure_names): output = str(name) + " " output += " ".join(["{0:8.1f}".format(residue) for residue in measure]) logger.info(output) with open(summary_filename, 'w') as summary_output: print("Height:", height_mean, "SD", height_sd, "ABDEV", height_abdev, '(nm)', file=summary_output) print("Twist:", twist_mean, "SD", twist_sd, "ABDEV", twist_abdev, file=summary_output) print("Residues/turn:", rnou_mean, "SD", rnou_sd, "ABDEV", rnou_abdev, file=summary_output) print("Local bending angles:", file=summary_output) residue_statistics = list(zip(*bending_statistics)) measure_names = ["Mean ", "SD ", "ABDEV"] print("ResID", end='', file=summary_output) if start is None: for item in range(4, len(residue_statistics[0]) + 4): output = "{0:8d}".format(item) print(output, end='', file=summary_output) else: for item in range(start + 3, len(residue_statistics[0]) + start + 3): output = "{0:8d}".format(item) print(output, end='', file=summary_output) print('', file=summary_output) for measure, name in zip(residue_statistics, measure_names): print(name, end='', file=summary_output) for residue in measure: output = "{0:8.1f}".format(residue) print(output, end='', file=summary_output) print('', file=summary_output)
class AnalysisBase(object): """Base class for defining multi frame analysis The class it is designed as a template for creating multiframe analyses. This class will automatically take care of setting up the trajectory reader for iterating, and it offers to show a progress meter. To define a new Analysis, `AnalysisBase` needs to be subclassed `_single_frame` must be defined. It is also possible to define `_prepare` and `_conclude` for pre and post processing. See the example below. .. code-block:: python class NewAnalysis(AnalysisBase): def __init__(self, atomgroup, parameter, **kwargs): super(NewAnalysis, self).__init__(atomgroup.universe.trajectory, **kwargs) self._parameter = parameter self._ag = atomgroup def _prepare(self): # OPTIONAL # Called before iteration on the trajectory has begun. # Data structures can be set up at this time self.result = [] def _single_frame(self): # REQUIRED # Called after the trajectory is moved onto each new frame. # store result of `some_function` for a single frame self.result.append(some_function(self._ag, self._parameter)) def _conclude(self): # OPTIONAL # Called once iteration on the trajectory is finished. # Apply normalisation and averaging to results here. self.result = np.asarray(self.result) / np.sum(self.result) Afterwards the new analysis can be run like this. .. code-block:: python na = NewAnalysis(u.select_atoms('name CA'), 35).run(start=10, stop=20) print(na.result) """ def __init__(self, trajectory, verbose=False, **kwargs): """ Parameters ---------- trajectory : mda.Reader A trajectory Reader verbose : bool, optional Turn on more logging and debugging, default ``False`` """ self._trajectory = trajectory self._verbose = verbose # do deprecated kwargs # remove in 1.0 deps = [] for arg in ['start', 'stop', 'step']: if arg in kwargs and not kwargs[arg] is None: deps.append(arg) setattr(self, arg, kwargs[arg]) if deps: warnings.warn( 'Setting the following kwargs should be ' 'done in the run() method: {}'.format(', '.join(deps)), DeprecationWarning) def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory # TODO: Remove once start/stop/step are deprecated from init # See if these have been set as class attributes, and use that start = getattr(self, 'start', start) stop = getattr(self, 'stop', stop) step = getattr(self, 'step', step) start, stop, step = trajectory.check_slice_indices(start, stop, step) self.start = start self.stop = stop self.step = step self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 verbose = getattr(self, '_verbose', False) self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=verbose) def _single_frame(self): """Calculate data from a single frame of trajectory Don't worry about normalising, just deal with a single frame. """ raise NotImplementedError("Only implemented in child classes") def _prepare(self): """Set things up before the analysis loop begins""" pass def _conclude(self): """Finalise the results you've gathered. Called at the end of the run() method to finish everything up. """ pass def run(self, start=None, stop=None, step=None, verbose=None): """Perform the calculation Parameters ---------- start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame verbose : bool, optional Turn on verbosity """ logger.info("Choosing frames to analyze") # if verbose unchanged, use class default verbose = getattr(self, '_verbose', False) if verbose is None else verbose self._setup_frames(self._trajectory, start, stop, step) logger.info("Starting preparation") self._prepare() for i, ts in enumerate( self._trajectory[self.start:self.stop:self.step]): self._frame_index = i self._ts = ts # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames)) self._single_frame() self._pm.echo(self._frame_index) logger.info("Finishing up") self._conclude() return self
def density_from_Universe(universe, delta=1.0, atomselection='name OH2', start=None, stop=None, step=None, metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, update_selection=False, verbose=False, interval=1, quiet=None, parameters=None, gridcenter=None, xdim=None, ydim=None, zdim=None): """Create a density grid from a :class:`MDAnalysis.Universe` object. The trajectory is read, frame by frame, and the atoms selected with `atomselection` are histogrammed on a grid with spacing `delta`. Parameters ---------- universe : MDAnalysis.Universe :class:`MDAnalysis.Universe` object with a trajectory atomselection : str (optional) selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta : float (optional) bin size for the density grid in Angstroem (same in x,y,z) [1.0] start : int (optional) stop : int (optional) step : int (optional) Slice the trajectory as ``trajectory[start:stop:step]``; default is to read the whole trajectory. metadata : dict. optional `dict` of additional data to be saved with the object; the meta data are passed through as they are. padding : float (optional) increase histogram dimensions by padding (on top of initial box size) in Angstroem. Padding is ignored when setting a user defined grid. [2.0] soluteselection : str (optional) MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff : float (optional) With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>" (Special routines that are faster than the standard ``AROUND`` selection); any value that evaluates to ``False`` (such as the default 0) disables this special selection. update_selection : bool (optional) Should the selection of atoms be updated for every step? [``False``] - ``True``: atom selection is updated for each frame, can be slow - ``False``: atoms are only selected at the beginning verbose : bool (optional) Print status update to the screen for every *interval* frame? [``True``] - ``False``: no status updates when a new frame is processed - ``True``: status update every frame (including number of atoms processed, which is interesting with ``update_selection=True``) interval : int (optional) Show status update every `interval` frame [1] parameters : dict (optional) `dict` with some special parameters for :class:`Density` (see docs) gridcenter : numpy ndarray, float32 (optional) 3 element numpy array detailing the x, y and z coordinates of the center of a user defined grid box in Angstroem [``None``] xdim : float (optional) User defined x dimension box edge in ångström; ignored if gridcenter is ``None`` ydim : float (optional) User defined y dimension box edge in ångström; ignored if gridcenter is ``None`` zdim : float (optional) User defined z dimension box edge in ångström; ignored if gridcenter is ``None`` Returns ------- :class:`Density` A :class:`Density` instance with the histogrammed data together with associated metadata. Notes ----- By default, the `atomselection` is static, i.e., atoms are only selected once at the beginning. If you want *dynamically changing selections* (such as "name OW and around 4.0 (protein and not name H*)", i.e., the water oxygen atoms that are within 4 Å of the protein heavy atoms) then set ``update_selection=True``. For the special case of calculating a density of the "bulk" solvent away from a solute use the optimized selections with keywords *cutoff* and *soluteselection* (see Examples below). Examples -------- Basic use for creating a water density (just using the water oxygen atoms "OW"):: density = density_from_Universe(universe, delta=1.0, atomselection='name OW') If you are only interested in water within a certain region, e.g., within a vicinity around a binding site, you can use a selection that updates every step by setting the `update_selection` keyword argument:: site_density = density_from_Universe(universe, delta=1.0, atomselection='name OW and around 5 (resid 156 157 305)', update_selection=True) A special case for an updating selection is to create the "bulk density", i.e., the water outside the immediate solvation shell of a protein: Select all water oxygen atoms that are *farther away* than a given cut-off (say, 4 Å) from the solute (here, heavy atoms of the protein):: bulk = density_from_Universe(universe, delta=1.0, atomselection='name OW', solute="protein and not name H*", cutoff=4) (Using the special case for the bulk with `soluteselection` and `cutoff` improves performance over the simple `update_selection` approach.) If you are interested in explicitly setting a grid box of a given edge size and origin, you can use the gridcenter and x/y/zdim arguments. For example to plot the density of waters within 5 Å of a ligand (in this case the ligand has been assigned the residue name "LIG") in a cubic grid with 20 Å edges which is centered on the centre of mass (COM) of the ligand:: # Create a selection based on the ligand ligand_selection = universe.select_atoms("resname LIG") # Extract the COM of the ligand ligand_COM = ligand_selection.center_of_mass() # Generate a density of waters on a cubic grid centered on the ligand COM # In this case, we update the atom selection as shown above. water_density = density_from_Universe(universe, delta=1.0, atomselection='name OW around 5 resname LIG', update_selection=True, gridcenter=ligand_COM, xdim=20.0, ydim=20.0, zdim=20.0) (It should be noted that the `padding` keyword is not used when a user defined grid is assigned). .. versionchanged:: 0.19.0 *gridcenter*, *xdim*, *ydim* and *zdim* keywords added to allow for user defined boxes .. versionchanged:: 0.13.0 *update_selection* and *quiet* keywords added .. deprecated:: 0.16 The keyword argument *quiet* is deprecated in favor of *verbose*. """ u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory( u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree, updating_selection=update_selection) def current_coordinates(): return notwithin_coordinates() else: group = u.select_atoms(atomselection, updating=update_selection) def current_coordinates(): return group.positions coord = current_coordinates() logger.info( "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total." "".format(coord.shape[0], len(u.select_atoms(atomselection)), atomselection, len(u.atoms)) ) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:] if tuple(angles) != (90., 90., 90.): msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!" warnings.warn(msg) logger.warning(msg) if gridcenter is not None: # Generate a copy of smin/smax from coords to later check if the # defined box might be too small for the selection smin = np.min(coord, axis=0) smax = np.max(coord, axis=0) # Overwrite smin/smax with user defined values smin, smax = _set_user_grid(gridcenter, xdim, ydim, zdim, smin, smax) else: # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = np.min(coord, axis=0) - padding smax = np.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = np.vstack((BINS['min'], BINS['max'])) arange = np.transpose(arange) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() pm = ProgressMeter(u.trajectory.n_frames, interval=interval, verbose=verbose, format="Histogramming %(n_atoms)6d atoms in frame " "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") start, stop, step = u.trajectory.check_slice_indices(start, stop, step) for ts in u.trajectory[start:stop:step]: coord = current_coordinates() pm.echo(ts.frame, n_atoms=len(coord)) if len(coord) == 0: continue h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram n_frames = len(range(start, stop, step)) grid /= float(n_frames) metadata = metadata if metadata is not None else {} metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['atomselection'] = atomselection metadata['n_frames'] = n_frames metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = MDAnalysis.core.flags['time_unit'] try: metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames except AttributeError: metadata['trajectory_skip'] = 1 # seems to not be used.. try: metadata['trajectory_delta'] = u.trajectory.delta # in native units except AttributeError: metadata['trajectory_delta'] = 1 if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = parameters if parameters is not None else {} parameters['isDensity'] = False # must override g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
def run(self, **kwargs): """Perform RMSD analysis on the trajectory. A number of parameters can be changed from the defaults. The result is stored as the array :attr:`RMSD.rmsd`. :Keywords: *start*, *stop*, *step* start and stop frame index with step size: analyse ``trajectory[start:stop:step]`` [``None``] *mass_weighted* do a mass-weighted RMSD fit *tol_mass* Reject match if the atomic masses for matched atoms differ by more than *tol_mass* *ref_frame* frame index to select frame from *reference* """ from itertools import izip start = kwargs.pop('start', None) stop = kwargs.pop('stop', None) step = kwargs.pop('step', None) mass_weighted = kwargs.pop('mass_weighted', self.mass_weighted) ref_frame = kwargs.pop('ref_frame', self.ref_frame) natoms = self.traj_atoms.n_atoms trajectory = self.universe.trajectory traj_atoms = self.traj_atoms if mass_weighted: # if performing a mass-weighted alignment/rmsd calculation weight = self.ref_atoms.masses / self.ref_atoms.masses.mean() else: weight = None # reference centre of mass system current_frame = self.reference.trajectory.ts.frame - 1 try: # Move to the ref_frame # (coordinates MUST be stored in case the ref traj is advanced elsewhere or if ref == mobile universe) self.reference.trajectory[ref_frame] ref_com = self.ref_atoms.center_of_mass() ref_coordinates = self.ref_atoms.positions - ref_com # makes a copy if self.groupselections_atoms: groupselections_ref_coords_T_64 = [ self.reference.select_atoms(*s['reference']).positions.T.astype(np.float64) for s in self.groupselections] finally: # Move back to the original frame self.reference.trajectory[current_frame] ref_coordinates_T_64 = ref_coordinates.T.astype(np.float64) # allocate the array for selection atom coords traj_coordinates = traj_atoms.coordinates().copy() if self.groupselections_atoms: # Only carry out a rotation if we want to calculate secondary RMSDs. # R: rotation matrix that aligns r-r_com, x~-x~com # (x~: selected coordinates, x: all coordinates) # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com rot = np.zeros(9, dtype=np.float64) # allocate space for calculation R = np.matrix(rot.reshape(3, 3)) else: rot = None # RMSD timeseries nframes = len(np.arange(0, len(trajectory))[start:stop:step]) rmsd = np.zeros((nframes, 3 + len(self.groupselections_atoms))) percentage = ProgressMeter(nframes, interval=10, format="RMSD %(rmsd)5.2f A at frame %(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") for k, ts in enumerate(trajectory[start:stop:step]): # shift coordinates for rotation fitting # selection is updated with the time frame x_com = traj_atoms.center_of_mass().astype(np.float32) traj_coordinates[:] = traj_atoms.coordinates() - x_com rmsd[k, :2] = ts.frame, trajectory.time if self.groupselections_atoms: # 1) superposition structures # Need to transpose coordinates such that the coordinate array is # 3xN instead of Nx3. Also qcp requires that the dtype be float64 # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd[k, 2] = qcp.CalcRMSDRotationalMatrix(ref_coordinates_T_64, traj_coordinates.T.astype(np.float64), natoms, rot, weight) R[:, :] = rot.reshape(3, 3) # Transform each atom in the trajectory (use inplace ops to avoid copying arrays) # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".) ts.positions -= x_com ts.positions[:] = ts.positions * R # R acts to the left & is broadcasted N times. ts.positions += ref_com # 2) calculate secondary RMSDs for igroup, (refpos, atoms) in enumerate( izip(groupselections_ref_coords_T_64, self.groupselections_atoms), 3): rmsd[k, igroup] = qcp.CalcRMSDRotationalMatrix(refpos, atoms['mobile'].positions.T.astype(np.float64), atoms['mobile'].n_atoms, None, weight) else: # only calculate RMSD by setting the Rmatrix to None # (no need to carry out the rotation as we already get the optimum RMSD) rmsd[k, 2] = qcp.CalcRMSDRotationalMatrix(ref_coordinates_T_64, traj_coordinates.T.astype(np.float64), natoms, None, weight) percentage.echo(ts.frame, rmsd=rmsd[k, 2]) self.rmsd = rmsd
def helanal_trajectory(universe, select="name CA", begin=None, finish=None, matrix_filename="bending_matrix.dat", origin_pdbfile="origin.pdb", summary_filename="summary.txt", screw_filename="screw.xvg", tilt_filename="local_tilt.xvg", fitted_tilt_filename="fit_tilt.xvg", bend_filename="local_bend.xvg", twist_filename="unit_twist.xvg", prefix="helanal_", ref_axis=None, verbose=False): """Perform HELANAL helix analysis on all frames in `universe`. Parameters ---------- universe : Universe select : str (optional) selection string that selects Calpha atoms [``"name CA"``] begin : float (optional) start analysing for time (ps) >= *begin*; ``None`` starts from the beginning [``None``] finish : float (optional) stop analysis for time (ps) =< *finish*; ``None`` goes to the end of the trajectory [``None``] matrix_filename : str (optional) Output file- bending matrix [``"bending_matrix.dat"``] origin_pdbfile : str (optional) Output file- origin pdb file [``"origin.pdb"``] summary_filename : str (optional) Output file- all of the basic data [``"summary.txt"``] screw_filename : str (optional) Output file- local tilts of individual residues from 2 to n-1 [``"screw.xvg"``] tilt_filename : str (optional) Output file- tilt of line of best fit applied to origin axes [``"local_tilt.xvg"``] bend_filename : str (optional) Output file- local bend angles between successive local helix axes [``"local_bend.xvg"``] twist_filename : str (optional) Output file- local unit twist between successive helix turns [``"unit_twist.xvg"``] prefix : str (optional) Prefix to add to all output file names; set to ``None`` to disable [``"helanal__"``] ref_axis : array_like (optional) Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]`` is chosen [``None``] verbose : bool (optional) Toggle diagnostic outputs. [``True``] Raises ------ ValueError If the specified start (begin) time occurs after the end of the trajectory object. If the specified finish time precedes the specified start time or current time stamp of trajectory object. Notes ----- Only a single helix is analyzed. Use the selection to specify the helix, e.g. with "name CA and resid 1:20" or use start=1, stop=20. .. versionchanged:: 0.13.0 New `quiet` keyword to silence frame progress output and most of the output that used to be printed to stdout is now logged to the logger *MDAnalysis.analysis.helanal* (at logelevel *INFO*). .. versionchanged:: 0.16.0 Removed the `start` and `end` keywords for selecting residues because this can be accomplished more transparently with `select`. The first and last resid are directly obtained from the selection. .. deprecated:: 0.16.0 The `quiet` keyword argument is deprecated in favor of the new `verbose` one. .. versionchanged:: 0.20.0 ProgressMeter now iterates over the number of frames analysed. .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` """ if ref_axis is None: ref_axis = np.array([0., 0., 1.]) else: # enable MDA API so that one can use a tuple of atoms or AtomGroup with # two atoms ref_axis = np.asarray(ref_axis) ca = universe.select_atoms(select) start, end = ca.resids[[0, -1]] trajectory = universe.trajectory # Validate user supplied begin / end times traj_end_time = trajectory.ts.time + trajectory.totaltime if begin is not None: if traj_end_time < begin: # Begin occurs after the end of the trajectory, throw error msg = ("The input begin time ({0} ps) occurs after the end " "of the trajectory ({1} ps)".format(begin, traj_end_time)) raise ValueError(msg) elif trajectory.ts.time > begin: # Begin occurs before trajectory start, warn and reset msg = ("The input begin time ({0} ps) precedes the starting " "trajectory time --- Setting starting frame to 0".format( begin)) warnings.warn(msg) logger.warning(msg) start_frame = None else: start_frame = int( np.ceil((begin - trajectory.ts.time) / trajectory.ts.dt)) else: start_frame = None if finish is not None: if (begin is not None) and (begin > finish): # finish occurs before begin time msg = ("The input finish time ({0} ps) precedes the input begin " "time ({1} ps)".format(finish, begin)) raise ValueError(msg) elif trajectory.ts.time > finish: # you'd be starting with a finish time(in ps) that has already # passed or is not available msg = ("The input finish time ({0} ps) precedes the current " "trajectory time ({1} ps)".format(finish, trajectory.time)) raise ValueError(msg) elif traj_end_time < finish: # finish time occurs after the end of trajectory, warn msg = ("The input finish time ({0} ps) occurs after the end of " "the trajectory ({1} ps). Finish time will be set to the " "end of the trajectory".format(finish, traj_end_time)) warnings.warn(msg) logger.warning(msg) end_frame = None else: # To replicate the original behaviour of break when # trajectory.time > finish, we add 1 here. end_frame = int( np.floor((finish - trajectory.ts.time) // trajectory.ts.dt) + 1) else: end_frame = None start_frame, end_frame, frame_step = trajectory.check_slice_indices( start_frame, end_frame, 1) n_frames = len(range(start_frame, end_frame, frame_step)) if start is not None and end is not None: logger.info("Analysing from residue %d to %d", start, end) elif start is not None and end is None: logger.info("Analysing from residue %d to the C termini", start) elif start is None and end is not None: logger.info("Analysing from the N termini to %d", end) logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues) if prefix is not None: prefix = str(prefix) matrix_filename = prefix + matrix_filename origin_pdbfile = prefix + origin_pdbfile summary_filename = prefix + summary_filename screw_filename = prefix + screw_filename tilt_filename = prefix + tilt_filename fitted_tilt_filename = prefix + fitted_tilt_filename bend_filename = prefix + bend_filename twist_filename = prefix + twist_filename backup_file(matrix_filename) backup_file(origin_pdbfile) backup_file(summary_filename) backup_file(screw_filename) backup_file(tilt_filename) backup_file(fitted_tilt_filename) backup_file(bend_filename) backup_file(twist_filename) global_height = [] global_twist = [] global_rnou = [] global_bending = [] global_bending_matrix = [] global_tilt = [] global_fitted_tilts = [] global_screw = [] pm = ProgressMeter(n_frames, verbose=verbose, format="Frame {step:5d}/{numsteps} " " [{percentage:5.1f}%]") for index, ts in enumerate(trajectory[start_frame:end_frame:frame_step]): pm.echo(index) frame = ts.frame ca_positions = ca.positions twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \ main_loop(ca_positions, ref_axis=ref_axis) origin_pdb(origins, origin_pdbfile) #calculate local bending matrix( it is looking at all i, j combinations) if len(global_bending_matrix) == 0: global_bending_matrix = [[[] for item in local_helix_axes] for item in local_helix_axes] for i in range(len(local_helix_axes)): for j in range(i + 1, len(local_helix_axes)): angle = np.rad2deg( np.arccos(np.dot(local_helix_axes[i], local_helix_axes[j]))) global_bending_matrix[i][j].append(angle) #global_bending_matrix[j][i].append(angle) #global_bending_matrix[i][i].append(0.) fit_vector, fit_tilt = vector_of_best_fit(origins) global_height += height global_twist += twist global_rnou += rnou #global_screw.append(local_screw_angles) global_fitted_tilts.append(np.rad2deg(fit_tilt)) #print out rotations across the helix to a file with open(twist_filename, "a") as twist_output: print(frame, end='', file=twist_output) for loc_twist in twist: print(loc_twist, end='', file=twist_output) print("", file=twist_output) with open(bend_filename, "a") as bend_output: print(frame, end='', file=bend_output) for loc_bend in bending_angles: print(loc_bend, end='', file=bend_output) print("", file=bend_output) with open(screw_filename, "a") as rot_output: print(frame, end='', file=rot_output) for rotation in local_screw_angles: print(rotation, end='', file=rot_output) print("", file=rot_output) with open(tilt_filename, "a") as tilt_output: print(frame, end='', file=tilt_output) for tilt in local_helix_axes: print(np.rad2deg(mdamath.angle(tilt, ref_axis)), end='', file=tilt_output) print("", file=tilt_output) with open(fitted_tilt_filename, "a") as tilt_output: print(frame, np.rad2deg(fit_tilt), file=tilt_output) if len(global_bending) == 0: global_bending = [[] for item in bending_angles] #global_tilt = [ [] for item in local_helix_axes ] for store, tmp in zip(global_bending, bending_angles): store.append(tmp) #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis)) twist_mean, twist_sd, twist_abdev = stats(global_twist) height_mean, height_sd, height_abdev = stats(global_height) rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou) ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts) bending_statistics = [stats(item) for item in global_bending] #tilt_statistics = [ stats(item) for item in global_tilt] bending_statistics_matrix = [[stats(col) for col in row] for row in global_bending_matrix] with open(matrix_filename, 'w') as mat_output: print("Mean", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[0]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print('\nSD', file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[1]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) print("\nABDEV", file=mat_output) for row in bending_statistics_matrix: for col in row: formatted_angle = "{0:6.1f}".format(col[2]) print(formatted_angle, end='', file=mat_output) print('', file=mat_output) logger.info("Height: %g SD: %g ABDEV: %g (Angstroem)", height_mean, height_sd, height_abdev) logger.info("Twist: %g SD: %g ABDEV: %g", twist_mean, twist_sd, twist_abdev) logger.info("Residues/turn: %g SD: %g ABDEV: %g", rnou_mean, rnou_sd, rnou_abdev) logger.info("Fitted tilt: %g SD: %g ABDEV: %g", ftilt_mean, ftilt_sd, ftilt_abdev) logger.info("Local bending angles:") residue_statistics = list(zip(*bending_statistics)) measure_names = ["Mean ", "SD ", "ABDEV"] if start is None: output = " ".join([ "{0:8d}".format(item) for item in range(4, len(residue_statistics[0]) + 4) ]) else: output = " ".join([ "{0:8d}".format(item) for item in range(start + 3, len(residue_statistics[0]) + start + 3) ]) logger.info("ResID %s", output) for measure, name in zip(residue_statistics, measure_names): output = str(name) + " " output += " ".join(["{0:8.1f}".format(residue) for residue in measure]) logger.info(output) with open(summary_filename, 'w') as summary_output: print("Height:", height_mean, "SD", height_sd, "ABDEV", height_abdev, '(nm)', file=summary_output) print("Twist:", twist_mean, "SD", twist_sd, "ABDEV", twist_abdev, file=summary_output) print("Residues/turn:", rnou_mean, "SD", rnou_sd, "ABDEV", rnou_abdev, file=summary_output) print("Local bending angles:", file=summary_output) residue_statistics = list(zip(*bending_statistics)) measure_names = ["Mean ", "SD ", "ABDEV"] print("ResID", end='', file=summary_output) if start is None: for item in range(4, len(residue_statistics[0]) + 4): output = "{0:8d}".format(item) print(output, end='', file=summary_output) else: for item in range(start + 3, len(residue_statistics[0]) + start + 3): output = "{0:8d}".format(item) print(output, end='', file=summary_output) print('', file=summary_output) for measure, name in zip(residue_statistics, measure_names): print(name, end='', file=summary_output) for residue in measure: output = "{0:8.1f}".format(residue) print(output, end='', file=summary_output) print('', file=summary_output)
class AnalysisBase(object): """Base class for defining multi frame analysis The class it is designed as a template for creating multiframe analyses. This class will automatically take care of setting up the trajectory reader for iterating, and it offers to show a progress meter. To define a new Analysis, `AnalysisBase` needs to be subclassed `_single_frame` must be defined. It is also possible to define `_prepare` and `_conclude` for pre and post processing. See the example below. .. code-block:: python class NewAnalysis(AnalysisBase): def __init__(self, atomgroup, parameter, **kwargs): super(NewAnalysis, self).__init__(atomgroup.universe.trajectory, **kwargs) self._parameter = parameter self._ag = atomgroup def _prepare(self): # OPTIONAL # Called before iteration on the trajectory has begun. # Data structures can be set up at this time self.result = [] def _single_frame(self): # REQUIRED # Called after the trajectory is moved onto each new frame. # store result of `some_function` for a single frame self.result.append(some_function(self._ag, self._parameter)) def _conclude(self): # OPTIONAL # Called once iteration on the trajectory is finished. # Apply normalisation and averaging to results here. self.result = np.asarray(self.result) / np.sum(self.result) Afterwards the new analysis can be run like this. .. code-block:: python na = NewAnalysis(u.select_atoms('name CA'), 35).run() print(na.result) """ def __init__(self, trajectory, start=None, stop=None, step=None, verbose=None, quiet=None): """ Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame verbose : bool, optional Turn on verbosity """ self._verbose = _set_verbose(verbose, quiet, default=False) self._quiet = not self._verbose self._setup_frames(trajectory, start, stop, step) def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory self.start = start self.stop = stop self.step = step start, stop, step = trajectory.check_slice_indices(start, stop, step) self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 # ensure _verbose is set when __init__ wasn't called, this is to not # break pre 0.16.0 API usage of AnalysisBase if not hasattr(self, '_verbose'): if hasattr(self, '_quiet'): # Here, we are in the odd case where a children class defined # self._quiet without going through AnalysisBase.__init__. warnings.warn("The *_quiet* attribute of analyses is " "deprecated (from 0.16)use *_verbose* instead.", DeprecationWarning) self._verbose = not self._quiet else: self._verbose = True self._quiet = not self._verbose self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose) def _single_frame(self): """Calculate data from a single frame of trajectory Don't worry about normalising, just deal with a single frame. """ raise NotImplementedError("Only implemented in child classes") def _prepare(self): """Set things up before the analysis loop begins""" pass def _conclude(self): """Finalise the results you've gathered. Called at the end of the run() method to finish everything up. """ pass def run(self): """Perform the calculation""" logger.info("Starting preparation") self._prepare() for i, ts in enumerate( self._trajectory[self.start:self.stop:self.step]): self._frame_index = i self._ts = ts # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames)) self._single_frame() self._pm.echo(self._frame_index) logger.info("Finishing up") self._conclude() return self
def rms_fit_trj(traj, reference, select='all', filename=None, rmsdfile=None, prefix='rmsfit_', mass_weighted=False, tol_mass=0.1, strict=False, force=True, quiet=False, **kwargs): """RMS-fit trajectory to a reference structure using a selection. Both reference *ref* and trajectory *traj* must be :class:`MDAnalysis.Universe` instances. If they contain a trajectory then it is used. The output file format is determined by the file extension of *filename*. One can also use the same universe if one wants to fit to the current frame. :Arguments: *traj* trajectory, :class:`MDAnalysis.Universe` object *reference* reference coordinates; :class:`MDAnalysis.Universe` object (uses the current time step of the object) *select* 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical selections in *mobile* and *reference*; or 2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the :func:`fasta2select` function returns such a dictionary based on a ClustalW_ or STAMP_ sequence alignment); or 3. a tuple ``(sel1, sel2)`` When using 2. or 3. with *sel1* and *sel2* then these selections can also each be a list of selection strings (to generate a AtomGroup with defined atom order as described under :ref:`ordered-selections-label`). *filename* file name for the RMS-fitted trajectory or pdb; defaults to the original trajectory filename (from *traj*) with *prefix* prepended *rmsdfile* file name for writing the RMSD timeseries [``None``] *prefix* prefix for autogenerating the new output filename *mass_weighted* do a mass-weighted RMSD fit *tol_mass* Reject match if the atomic masses for matched atoms differ by more than *tol_mass* [0.1] *strict* Default: ``False`` - ``True``: Will raise :exc:`SelectioError` if a single atom does not match between the two selections. - ``False``: Will try to prepare a matching selection by dropping residues with non-matching atoms. See :func:`get_matching_atoms` for details. *force* - ``True``: Overwrite an existing output trajectory (default) - ``False``: simply return if the file already exists *quiet* - ``True``: suppress progress and logging for levels INFO and below. - ``False``: show all status messages and do not change the the logging level (default) .. Note:: If *kwargs* All other keyword arguments are passed on the trajectory :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing trajectories on the fly (e.g. change the output format by changing the extension of *filename* and setting different parameters as described for the corresponding writer). :Returns: *filename* (either provided or auto-generated) .. _ClustalW: http://www.clustal.org/ .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/ .. versionchanged:: 0.8 Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and *filename* is returned. .. versionchanged:: 0.10.0 Uses :func:`get_matching_atoms` to work with incomplete selections and new *strict* keyword. The new default is to be lenient whereas the old behavior was the equivalent of *strict* = ``True``. """ frames = traj.trajectory if quiet: # should be part of a try ... finally to guarantee restoring the log level logging.disable(logging.WARN) kwargs.setdefault('remarks', 'RMS fitted trajectory to reference') if filename is None: path, fn = os.path.split(frames.filename) filename = os.path.join(path, prefix + fn) _Writer = frames.Writer else: _Writer = frames.OtherWriter if os.path.exists(filename) and not force: logger.warn( "{0} already exists and will NOT be overwritten; use force=True if you want this" .format(filename)) return filename writer = _Writer(filename, **kwargs) del _Writer select = rms._process_selection(select) ref_atoms = reference.select_atoms(*select['reference']) traj_atoms = traj.select_atoms(*select['mobile']) natoms = traj_atoms.n_atoms ref_atoms, traj_atoms = get_matching_atoms(ref_atoms, traj_atoms, tol_mass=tol_mass, strict=strict) logger.info("RMS-fitting on {0:d} atoms.".format(len(ref_atoms))) if mass_weighted: # if performing a mass-weighted alignment/rmsd calculation weight = ref_atoms.masses / ref_atoms.masses.mean() else: weight = None # reference centre of mass system ref_com = ref_atoms.center_of_mass() ref_coordinates = ref_atoms.coordinates() - ref_com # allocate the array for selection atom coords traj_coordinates = traj_atoms.coordinates().copy() # RMSD timeseries nframes = len(frames) rmsd = np.zeros((nframes, )) # R: rotation matrix that aligns r-r_com, x~-x~com # (x~: selected coordinates, x: all coordinates) # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com rot = np.zeros(9, dtype=np.float64) # allocate space for calculation R = np.matrix(rot.reshape(3, 3)) percentage = ProgressMeter( nframes, interval=10, quiet=quiet, format="Fitted frame %(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r") for k, ts in enumerate(frames): # shift coordinates for rotation fitting # selection is updated with the time frame x_com = traj_atoms.center_of_mass().astype(np.float32) traj_coordinates[:] = traj_atoms.coordinates() - x_com # Need to transpose coordinates such that the coordinate array is # 3xN instead of Nx3. Also qcp requires that the dtype be float64 # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd[k] = qcp.CalcRMSDRotationalMatrix( ref_coordinates.T.astype(np.float64), traj_coordinates.T.astype(np.float64), natoms, rot, weight) R[:, :] = rot.reshape(3, 3) # Transform each atom in the trajectory (use inplace ops to avoid copying arrays) # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".) ts.positions -= x_com ts.positions[:] = ts.positions * R # R acts to the left & is broadcasted N times. ts.positions += ref_com writer.write(traj.atoms) # write whole input trajectory system percentage.echo(ts.frame) logger.info("Wrote %d RMS-fitted coordinate frames to file %r", frames.n_frames, filename) if not rmsdfile is None: np.savetxt(rmsdfile, rmsd) logger.info("Wrote RMSD timeseries to file %r", rmsdfile) if quiet: # should be part of a try ... finally to guarantee restoring the log level logging.disable(logging.NOTSET) return filename
class AnalysisBase(object): """Base class for defining multi frame analysis The class it is designed as a template for creating multiframe analyses. This class will automatically take care of setting up the trajectory reader for iterating, and it offers to show a progress meter. To define a new Analysis, `AnalysisBase` needs to be subclassed `_single_frame` must be defined. It is also possible to define `_prepare` and `_conclude` for pre and post processing. See the example below. .. code-block:: python class NewAnalysis(AnalysisBase): def __init__(self, atomgroup, parameter, **kwargs): super(NewAnalysis, self).__init__(atomgroup.universe.trajectory, **kwargs) self._parameter = parameter self._ag = atomgroup def _prepare(self): # OPTIONAL # Called before iteration on the trajectory has begun. # Data structures can be set up at this time self.result = [] def _single_frame(self): # REQUIRED # Called after the trajectory is moved onto each new frame. # store result of `some_function` for a single frame self.result.append(some_function(self._ag, self._parameter)) def _conclude(self): # OPTIONAL # Called once iteration on the trajectory is finished. # Apply normalisation and averaging to results here. self.result = np.asarray(self.result) / np.sum(self.result) Afterwards the new analysis can be run like this. .. code-block:: python na = NewAnalysis(u.select_atoms('name CA'), 35).run() print(na.result) """ def __init__(self, trajectory, start=None, stop=None, step=None, verbose=None, quiet=None): """ Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame verbose : bool, optional Turn on verbosity """ self._verbose = _set_verbose(verbose, quiet, default=False) self._quiet = not self._verbose self._setup_frames(trajectory, start, stop, step) def _setup_frames(self, trajectory, start=None, stop=None, step=None): """ Pass a Reader object and define the desired iteration pattern through the trajectory Parameters ---------- trajectory : mda.Reader A trajectory Reader start : int, optional start frame of analysis stop : int, optional stop frame of analysis step : int, optional number of frames to skip between each analysed frame """ self._trajectory = trajectory start, stop, step = trajectory.check_slice_indices(start, stop, step) self.start = start self.stop = stop self.step = step self.n_frames = len(range(start, stop, step)) interval = int(self.n_frames // 100) if interval == 0: interval = 1 # ensure _verbose is set when __init__ wasn't called, this is to not # break pre 0.16.0 API usage of AnalysisBase if not hasattr(self, '_verbose'): if hasattr(self, '_quiet'): # Here, we are in the odd case where a children class defined # self._quiet without going through AnalysisBase.__init__. warnings.warn( "The *_quiet* attribute of analyses is " "deprecated (from 0.16)use *_verbose* instead.", DeprecationWarning) self._verbose = not self._quiet else: self._verbose = True self._quiet = not self._verbose self._pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose) def _single_frame(self): """Calculate data from a single frame of trajectory Don't worry about normalising, just deal with a single frame. """ raise NotImplementedError("Only implemented in child classes") def _prepare(self): """Set things up before the analysis loop begins""" pass def _conclude(self): """Finalise the results you've gathered. Called at the end of the run() method to finish everything up. """ pass def run(self): """Perform the calculation""" logger.info("Starting preparation") self._prepare() for i, ts in enumerate( self._trajectory[self.start:self.stop:self.step]): self._frame_index = i self._ts = ts # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames)) self._single_frame() self._pm.echo(self._frame_index) logger.info("Finishing up") self._conclude() return self