def gro_to_tracks(filename, destination, sub=slice(None), clear=True): gro_reader = GroReader(filename, sub) num_atoms = gro_reader.num_atoms names = ["time"] fields = [("time", numpy.float32)] names.extend(sum(( ["atom.pos.%07i.x" % index, "atom.pos.%07i.y" % index, "atom.pos.%07i.z" % index] for index in xrange(num_atoms) ), [])) fields.append(("pos", numpy.float32, (num_atoms,3))) names.extend(sum(( ["atom.vel.%07i.x" % index, "atom.vel.%07i.y" % index, "atom.vel.%07i.z" % index] for index in xrange(num_atoms) ), [])) fields.append(("vel", numpy.float32, (num_atoms,3))) names.extend([ "cell.a.x", "cell.b.x", "cell.c.x", "cell.a.y", "cell.b.y", "cell.c.y", "cell.a.z", "cell.b.z", "cell.c.z", ]) fields.append(("cell", numpy.float32, (3,3))) dtype = numpy.dtype(fields) filenames = [os.path.join(destination, name) for name in names] mtw = MultiTracksWriter(filenames, dtype, clear=clear) for time, pos, vel, cell in gro_reader: mtw.dump_row((time, pos, vel, cell)) mtw.finish()
def atrj_to_tracks(filename, destination, sub=slice(None), atom_indexes=None, clear=True): atrj_reader = ATRJReader(filename, sub) if atom_indexes is None: atom_indexes = range(atrj_reader.num_atoms) else: atom_indexes = list(atom_indexes) filenames = [] fields = [] for index in atom_indexes: for cor in ["x", "y", "z"]: filenames.append(os.path.join(destination, "atom.pos.%07i.%s" % (index, cor))) fields.append( ("cor", float, (len(atom_indexes),3)) ) filenames.append(os.path.join(destination, "time")) fields.append( ("time", float, 1) ) filenames.append(os.path.join(destination, "step")) fields.append( ("step", int, 1) ) filenames.append(os.path.join(destination, "total_energy")) fields.append( ("tote", float, 1) ) dtype = numpy.dtype(fields) mtw = MultiTracksWriter(filenames, dtype, clear=clear) for frame in atrj_reader: mtw.dump_row(( frame.coordinates[atom_indexes], frame.time, frame.step, frame.total_energy )) mtw.finish()
def cpmd_traj_to_tracks(filename, num_atoms, destination, sub=slice(None), atom_indexes=None, clear=True): """Convert a cpmd trajectory file into separate tracks. num_atoms must be the number of atoms in the system. """ if atom_indexes is None: atom_indexes = range(num_atoms) else: atom_indexes = list(atom_indexes) names = [] for index in atom_indexes: names.append("atom.pos.%07i.x" % index) names.append("atom.pos.%07i.y" % index) names.append("atom.pos.%07i.z" % index) for index in atom_indexes: names.append("atom.vel.%07i.x" % index) names.append("atom.vel.%07i.y" % index) names.append("atom.vel.%07i.z" % index) filenames = list(os.path.join(destination, name) for name in names) shape = (len(atom_indexes), 3) dtype = numpy.dtype([("pos", float, shape), ("vel", float, shape)]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) ctr = CPMDTrajectoryReader(filename, sub) for pos, vel in ctr: mtw.dump_row((pos,vel)) mtw.finish()
def cpmd_ener_to_tracks(filename, destination, sub=slice(None), clear=True): """Convert a cp2k energy file into separate tracks.""" names = ["step", "fict_kinectic_energy", "temperature", "potential_energy", "classical_energy", "hamiltonian_energy", "ms_displacement"] filenames = list(os.path.join(destination, name) for name in names) dtypes = [int, float, float, float, float, float, float] dtype = numpy.dtype([ (name, t, 1) for name, t in zip(names, dtypes) ]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) f = file(filename) for line in itertools.islice(f, sub.start, sub.stop, sub.step): row = tuple(float(word) for word in line.split()[:7]) mtw.dump_row(row) f.close() mtw.finish()
def cp2k_ener_to_tracks(filename, destination, sub=slice(None), clear=True): """Convert a cp2k energy file into separate tracks.""" names = ["step", "time", "kinetic_energy", "temperature", "potential_energy", "conserved_quantity"] filenames = list(os.path.join(destination, name) for name in names) dtypes = [int, float, float, float, float, float] dtype = numpy.dtype([ (name, t, 1) for name, t in zip(names, dtypes) ]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) f = file(filename) for line in itertools.islice(iter_real_lines(f), sub.start, sub.stop, sub.step): row = [float(word) for word in line.split()[:6]] row[1] = row[1]*femtosecond mtw.dump_row(tuple(row)) f.close() mtw.finish()
def cp2k_stress_to_tracks(filename, destination, sub=slice(None), clear=True): names = ["step", "time", "stress.xx", "stress.xy", "stress.xz", "stress.yx", "stress.yy", "stress.yz", "stress.zx", "stress.zy", "stress.zz", "pressure"] filenames = list(os.path.join(destination, name) for name in names) dtype = numpy.dtype([("step", int),("time", float),("stress", float, (3,3)),("pressure", float)]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) f = file(filename) for line in itertools.islice(iter_real_lines(f), sub.start, sub.stop, sub.step): values = [float(word) for word in line.split()[:11]] row = [int(values[0]),values[1]*femtosecond] cell = numpy.array(values[2:11]).reshape(3,3).transpose()*bar row.append(cell) row.append((cell[0,0]+cell[1,1]+cell[2,2])/3) mtw.dump_row(tuple(row)) f.close() mtw.finish()
def xyz_to_tracks(filename, middle_word, destination, sub=slice(None), file_unit=angstrom, atom_indexes=None, clear=True): """Convert an xyz file into separate tracks.""" xyz_reader = XYZReader(filename, sub, file_unit=file_unit) filenames = [] if atom_indexes is None: atom_indexes = range(len(xyz_reader.numbers)) else: atom_indexes = list(atom_indexes) for index in atom_indexes: for cor in ["x", "y", "z"]: filenames.append(os.path.join(destination, "atom.%s.%07i.%s" % (middle_word, index, cor))) shape = (len(atom_indexes),3) dtype = numpy.dtype([("cor", float, shape)]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) for title, coordinates in xyz_reader: mtw.dump_row((coordinates[atom_indexes],)) mtw.finish()
def cp2k_cell_to_tracks(filename, destination, sub=slice(None), clear=True): names = ["step", "time", "cell.a.x", "cell.a.y", "cell.a.z", "cell.b.x", "cell.b.y", "cell.b.z", "cell.c.x", "cell.c.y", "cell.c.z", "volume", "cell.a", "cell.b", "cell.c", "cell.alpha", "cell.beta", "cell.gamma"] filenames = list(os.path.join(destination, name) for name in names) dtype = numpy.dtype([("step", int),("time", float),("cell", float, (3,3)),("volume", float),("norms", float, 3),("angles", float, 3)]) mtw = MultiTracksWriter(filenames, dtype, clear=clear) f = file(filename) for line in itertools.islice(iter_real_lines(f), sub.start, sub.stop, sub.step): values = [float(word) for word in line.split()[:12]] row = [int(values[0]),values[1]*femtosecond] cell = numpy.array(values[2:11]).reshape(3,3).transpose()*angstrom row.append(cell) row.append(values[11] * angstrom**3) norms = numpy.sqrt((cell**2).sum(axis=0)) row.append(norms) alpha = numpy.arccos(numpy.clip(numpy.dot(cell[:,1],cell[:,2])/norms[1]/norms[2], -1,1)) beta = numpy.arccos(numpy.clip(numpy.dot(cell[:,2],cell[:,0])/norms[2]/norms[0], -1,1)) gamma = numpy.arccos(numpy.clip(numpy.dot(cell[:,0],cell[:,1])/norms[0]/norms[1], -1,1)) row.append(numpy.array([alpha,beta,gamma])) mtw.dump_row(tuple(row)) f.close() mtw.finish()
def lammps_dump_to_tracks(filename, destination, meta, sub=slice(None), clear=True): units = [] for unit, name, isvector in meta: if isvector: units.extend([unit, unit, unit]) else: units.append(unit) dump_reader = LAMMPSDumpReader(filename, units, sub) num_atoms = dump_reader.num_atoms filenames = [os.path.join(destination, "step")] fields = [("step", int)] for unit, name, isvector in meta: if isvector: for i in xrange(num_atoms): filenames.append(os.path.join(destination, "atom.%s.%07i.x" % (name, i))) fields.append(("atom.%s.x" % name, float, num_atoms)) for i in xrange(num_atoms): filenames.append(os.path.join(destination, "atom.%s.%07i.y" % (name, i))) fields.append(("atom.%s.y" % name, float, num_atoms)) for i in xrange(num_atoms): filenames.append(os.path.join(destination, "atom.%s.%07i.z" % (name, i))) fields.append(("atom.%s.z" % name, float, num_atoms)) else: for i in xrange(num_atoms): filenames.append(os.path.join(destination, "atom.%s.%07i" % (name, i))) fields.append(("atom.%s" % name, float, num_atoms)) dtype = numpy.dtype(fields) mtw = MultiTracksWriter(filenames, dtype, clear=clear) for frame in dump_reader: mtw.dump_row(tuple(frame)) mtw.finish()
def dlpoly_output_to_tracks( filename, destination, sub=slice(None), clear=True, skip_equi_period=True, pos_unit=angstrom, time_unit=picosecond, angle_unit=deg, e_unit=amu/(angstrom/picosecond)**2 ): output_reader = DLPolyOutputReader(filename, sub, skip_equi_period, pos_unit, time_unit, angle_unit, e_unit) filenames = [ "step", "conserved_quantity", "temperature", "potential_energy", "vanderwaals_energy", "coulomb_energy", "bond_energy", "bending_energy", "torsion_energy", "tethering_energy", "time", "enthalpy", "rotational_temperature", "virial", "vanderwaals_virial", "coulomb_virial", "bond_viral", "bending_virial", "constraint_virial", "tethering_virial", "cputime", "volume", "shell_temperature", "shell_energy", "shell_virial", "cell.alpha", "cell.beta", "cell.gamma", "pmf_virial", "pressure", ] filenames = [os.path.join(destination, filename) for filename in filenames] fields = [("step", int)] + [("foo%i" % i, float) for i in xrange(29)] dtype = numpy.dtype(fields) mtw = MultiTracksWriter(filenames, dtype, clear=clear) for row in output_reader: mtw.dump_row(tuple(row)) mtw.finish()
def init_proj(self, output_prefix=None): """Setup the projection of the trajectory on the pca eigenmodes. When output prefix is given, the principal components are written to tracks with the following filenames: ${output_prefix}.${index}. After init_proj, call data_proj one or more times with the relevant data segments from the trajectory. Finally, call finish_proj. """ N = len(self.evals) if output_prefix is not None: paths_out = [ "%s.pc.%07i" % (output_prefix, index) for index in xrange(N) ] dtype = numpy.dtype([("data", float, N)]) self.proj_mtw = MultiTracksWriter(paths_out, dtype) else: self.proj_mtw = None self.sqnorms_data = numpy.zeros(N, float) self.sqnorms_cos = numpy.zeros(N, float) self.dot_data_cos = numpy.zeros(N, float) self.proj_counter = 0
class CovarianceMatrix(object): """A container for all information related to a covariance matrix.""" def __init__(self, length, matrix, sum, weights=None, correlation=False, reference=None): """Initialize a covariance matrix object The arguments to initialize a CovarianceMatrix instance are built up by processing a trajectory. Look at CovarianceBlocks for an example. They are all stored as attributes. Also some direved properties are computed during the initialization. In a second stage, one can reprocess that data with init_proj, data_proj and finish_proj to compute the principal components and the cosine content. Arguments: length -- the length of (a part of) the trajectory used to construct the matrix matrix -- the matrix built up by adding numpy.data(data.transpose(),data) for multiple data arrays belonging to one block sum -- the sum of the data over time Optional arguments: weights -- When given, the principal components are computed in weighted coordinates. correlation -- When True, the analysis is performed on the matrix with correlation coefficients. This might be usefull to compare the eigenvalue spectrum with the Wishart distribution. reference -- When given, the reference is assumed to be the vector with the averages of the inputs. Otherwise, the averages are derived from the input. Derive attributes: cov -- The actual covariance/correlation matrix mean -- The average of the inputs over time evals -- The eigenvalues of the covariance matrix evecs -- The corresponding eigenvectors sigmas -- The square roots of the eigenvalues. Attributes available after projection: sqnorms_data -- squared norms of the principal components sqnorms_cos -- squared norms of the cosines dot_data_cos -- inner product between principal component and cosine ccs -- the cosine contents of each principal component """ # the raw data self.length = length self.matrix = matrix self.sum = sum self.weights = weights self.correlation = correlation self.reference = reference # the derived properties self.cov = self.matrix / self.length # the actual covariance matrix if self.reference is None: self.mean = self.sum / self.length else: self.mean = self.reference self.cov -= numpy.outer(self.mean, self.mean) if self.correlation: diag_sqrt = numpy.sqrt(numpy.diag(self.cov)) self.cov /= numpy.outer(diag_sqrt, diag_sqrt) elif self.weights is not None: scale = numpy.sqrt(self.weights) self.cov *= numpy.outer(scale, scale) # the eigen decomposition self.evals, self.evecs = numpy.linalg.eigh(self.cov) # largest eigenvalues first self.evals = self.evals[::-1] self.evecs = self.evecs[:,::-1] self.sigmas = numpy.sqrt(abs(self.evals)) def init_proj(self, output_prefix=None): """Setup the projection of the trajectory on the pca eigenmodes. When output prefix is given, the principal components are written to tracks with the following filenames: ${output_prefix}.${index}. After init_proj, call data_proj one or more times with the relevant data segments from the trajectory. Finally, call finish_proj. """ N = len(self.evals) if output_prefix is not None: paths_out = [ "%s.pc.%07i" % (output_prefix, index) for index in xrange(N) ] dtype = numpy.dtype([("data", float, N)]) self.proj_mtw = MultiTracksWriter(paths_out, dtype) else: self.proj_mtw = None self.sqnorms_data = numpy.zeros(N, float) self.sqnorms_cos = numpy.zeros(N, float) self.dot_data_cos = numpy.zeros(N, float) self.proj_counter = 0 def data_proj(self, data): """Process data to compute the principal components and the cosine content First call init_proj, then call this routine multiple times. Finally call finish_proj. """ data = data - self.mean if self.correlation: data /= numpy.sqrt(numpy.diag(self.cov)) elif self.weights is not None: data *= self.weights pcs = numpy.dot(data, self.evecs) if self.proj_mtw is not None: self.proj_mtw.dump_buffer({"data": pcs}) t = numpy.arange(self.proj_counter, self.proj_counter+len(data))*(numpy.pi/self.length) for i in xrange(data.shape[1]): # iterate ove the columns c = numpy.cos((1+i)*t) self.sqnorms_data[i] += numpy.dot(pcs[:,i], pcs[:,i]) self.sqnorms_cos[i] += numpy.dot(c, c) self.dot_data_cos[i] += numpy.dot(pcs[:,i], c) self.proj_counter += len(data) def finish_proj(self): """Compute the actual cosine contents. Call finish_proj after the last call to data_proj. """ self.ccs = self.dot_data_cos**2/(self.sqnorms_data*self.sqnorms_cos+1e-10) if self.proj_mtw is not None: self.proj_mtw.finish() del self.proj_mtw del self.proj_counter
def dlpoly_history_to_tracks( filename, destination, sub=slice(None), atom_indexes=None, clear=True, pos_unit=angstrom, vel_unit=angstrom/picosecond, frc_unit=amu*angstrom/picosecond**2, time_unit=picosecond, mass_unit=amu ): hist_reader = DLPolyHistoryReader(filename, sub, pos_unit, vel_unit, frc_unit, time_unit, mass_unit) if atom_indexes is None: atom_indexes = range(hist_reader.num_atoms) else: atom_indexes = list(atom_indexes) filenames = [] fields = [] filenames.append(os.path.join(destination, "step")) fields.append( ("step", int, 1) ) filenames.append(os.path.join(destination, "time")) fields.append( ("time", float, 1) ) for vec in "abc": for cor in "xyz": filenames.append(os.path.join(destination, "cell.%s.%s" % (vec, cor))) fields.append( ("cell", float, (3,3)) ) for vec in "abc": filenames.append(os.path.join(destination, "cell.%s" % (vec))) fields.append( ("norms", float, 3) ) for angle in "alpha", "beta", "gamma": filenames.append(os.path.join(destination, "cell.%s" % (angle))) fields.append( ("angles", float, 3) ) for index in atom_indexes: for cor in "xyz": filenames.append(os.path.join(destination, "atom.pos.%07i.%s" % (index, cor))) fields.append( ("pos", float, (len(atom_indexes),3)) ) if hist_reader.keytrj > 0: for index in atom_indexes: for cor in "xyz": filenames.append(os.path.join(destination, "atom.vel.%07i.%s" % (index, cor))) fields.append( ("vel", float, (len(atom_indexes),3)) ) if hist_reader.keytrj > 1: for index in atom_indexes: for cor in "xyz": filenames.append(os.path.join(destination, "atom.frc.%07i.%s" % (index, cor))) fields.append( ("frc", float, (len(atom_indexes),3)) ) dtype = numpy.dtype(fields) mtw = MultiTracksWriter(filenames, dtype, clear=clear) for frame in hist_reader: cell = frame["cell"] norms = numpy.sqrt((cell**2).sum(axis=0)) frame["norms"] = norms alpha = numpy.arccos(numpy.clip(numpy.dot(cell[:,1],cell[:,2])/norms[1]/norms[2], -1,1)) beta = numpy.arccos(numpy.clip(numpy.dot(cell[:,2],cell[:,0])/norms[2]/norms[0], -1,1)) gamma = numpy.arccos(numpy.clip(numpy.dot(cell[:,0],cell[:,1])/norms[0]/norms[1], -1,1)) frame["angles"] = [alpha, beta, gamma] frame["pos"] = frame["pos"][atom_indexes] if hist_reader.keytrj > 0: frame["vel"] = frame["vel"][atom_indexes] if hist_reader.keytrj > 0: frame["frc"] = frame["frc"][atom_indexes] mtw.dump_row(tuple(frame[name] for name, type, shape in fields)) mtw.finish()