def __get_records_per_type__(self, lines, number): nrecords = False while lines: line = lines.pop(0) if "molecules of type" in line: split = line.split() nrecords = int(split[0]) assert int(split[-1]) == int( number ), "molecule order %r in file doesn't match with number %r" ( str(split[-1]), str(number)) break if not nrecords: Logger.error("number of records for type number %r not found." % number) raise # skip atomic sites lines line = lines.pop(0) assert "atomic sites" in line, "'atomic site' line must proceed 'molecules of type %i'" % number nsites = int(line.split()[0]) assert nsites > 0, "number of 'atomic sites' must be bigger than 1" while nsites > 0: nsites -= 1 lines.pop(0) return nrecords
def export_atoms(self, filePath, indexesOffset=1, format="NAMD_PSF", closeFile=True): """ Exports atoms to ascii file.\n :Parameters: #. filePath (path): the file path. #. indexesOffset (int): atoms indexing starts from zero. this adds an offset #. format (str): The format of exportation. Exisiting formats are: NAMD_PSF, """ try: fd = open(filePath, 'w') except: raise Logger.error("cannot open file %r for writing" % filePath) if format is "NAMD_PSF": self.__NAMD_PSF_export_atoms__(fd, indexesOffset=indexesOffset) else: fd.close() raise Logger.error("format %r is not defined" % format) # close file if closeFile: fd.close()
def export_dihedrals(self, filePath, indexesOffset=1, key="atom_name", format="NAMD_PSF", closeFile=True): """ Exports dihedrals to ascii file.\n :Parameters: #. filePath (path): the file path. #. indexesOffset (int): atoms indexing starts from zero. this adds an offset. applies only to NAMD_PSF #. key (str): any pdbparser.records attribute. applies only to NAMD_TOP #. format (str): The format of exportation. Exisiting formats are: NAMD_PSF, NAMD_TOP """ try: fd = open(filePath, 'w') except: raise Logger.error("cannot open file %r for writing" % filePath) if format is "NAMD_PSF": self.__NAMD_PSF_export_dihedrals__(fd, indexesOffset=indexesOffset) elif format is "NAMD_TOP": self.__NAMD_TOP_export_dihedrals__(fd, key=key) else: fd.close() raise Logger.error("format %r is not defined" % format) # close file if closeFile: fd.close()
def __init__(self, trajectory, configurationsIndexes, cylinderAtomsIndexes, targetAtomsIndexes, axis=None, weighting="equal", histBin=1, *args, **kwargs): # set trajectory super(MeanSquareDisplacementInCylinder,self).__init__(trajectory, *args, **kwargs) # set configurations indexes self.configurationsIndexes = self.get_trajectory_indexes(configurationsIndexes) # set atoms indexes self.targetAtomsIndexes = self.get_atoms_indexes(targetAtomsIndexes) self.cylinderAtomsIndexes = self.get_atoms_indexes(cylinderAtomsIndexes) # set steps indexes self.numberOfSteps = len(self.targetAtomsIndexes) # set weighting assert is_element_property(weighting), Logger.error("weighting '%s' don't exist in database"%weighting) self.weighting = weighting # set residency time histogram bin try: self.histBin = float(histBin) except: raise Logger.error("histBin must be number convertible. %s is given."%histBin) assert self.histBin%1 == 0, logger.error("histBin must be integer. %s is given."%histBin) assert self.histBin>0, logger.error("histBin must be positive. %s is given."%histBin) assert self.histBin<len(self.configurationsIndexes), logger.error("histBin must smaller than numberOfConfigurations") # initialize variables self.__initialize_variables__(axis) # initialize results self.__initialize_results__() # get cylinder centers, matrices, radii, length Logger.info("%s --> initializing cylinder parameters along all configurations"%self.__class__.__name__) self.cylCenters, self.cylMatrices, self.cylRadii, self.cylLengths = self.__get_cylinder_properties__()
def __init__(self, filename): """ The constructor. :Parameters: #. filename (string): the binary input file """ # time unit in charmm self.charmmTimeToPs = 0.0488882129084 # Identity the byte order of the file by trial-and-error self.__byteOrder = None data = file(filename, 'rb').read(4) for byte_order in ['<', '>']: reclen = struct.unpack(byte_order + 'i', data)[0] if reclen == 84: self.__byteOrder = byte_order break if self.__byteOrder is None: raise Logger.error("%s is not a DCD file" % filename) # Open the file self.__binary = FortranBinaryFile(filename, self.__byteOrder) # Read the header information header_data = self.__binary.next() if header_data[:4] != 'CORD': raise Logger.error("%s is not a DCD file" % filename) self.header = struct.unpack(self.__byteOrder + '9id9i', header_data[4:]) self.numberOfConfigurations = self.header[0] self.istart = self.header[1] self.nsavc = self.header[2] self.namnf = self.header[8] self.charmmVersion = self.header[-1] self.has_pbc_data = False self.has_4d = False if self.charmmVersion != 0: self.header = struct.unpack(self.__byteOrder + '9if10i', header_data[4:]) if self.header[10] != 0: self.has_pbc_data = True if self.header[11] != 0: self.has_4d = True self.delta = self.header[9] * self.charmmTimeToPs # Read the title title_data = self.__binary.next() nlines = struct.unpack(self.__byteOrder + 'i', title_data[:4])[0] assert len(title_data) == 80 * nlines + 4, Logger.error( "%s is not a DCD file" % filename) title_data = title_data[4:] title = [] for i in range(nlines): title.append(title_data[:80].rstrip()) title_data = title_data[80:] self.title = '\n'.join(title) # Read the number of atoms. self.natoms = self.__binary.get_record('i')[0] # Stop if there are fixed atoms. if self.namnf > 0: raise Logger.error("NAMD converter can not handle fixed atoms yet")
def convert(self, types=None): """ Converts to pdbparser """ # read lines lines = self.get_lines() # get number of atoms self.info["number_of_records"] = self.__get_number_of_records__(lines) # get number of types self.info["number_of_types"] = self.__get_number_of_types__(lines) # get simulation box vectors self.info["vectors"] = self.__get_box_vectors__(lines) # set types names if types is None: Logger.info( "types are not given. carbon element is considered for all types" ) self.info["types"] = [{ "name": "c%s" % idx, "element": "c" } for idx in range(self.info["number_of_types"])] else: assert len(types) == self.info[ "number_of_types"], "types must be a list of length equal to the number of types" for idx in range(self.info["number_of_types"]): if not isinstance(types[idx], dict): assert is_element( types[idx] ), "%s not found database elements" % types[idx] types[idx] = {"name": types[idx], "element": types[idx]} assert "name" in types[ idx], "every type dictionary must have 'name' and 'element' keys" assert "element" in types[ idx], "every type dictionary must have 'name' and 'element' keys" if types[idx]["element"].lower( ) not in __atoms_database__.keys(): Logger.warr("type %r is not defined in database" % types[idx]["element"]) else: types[idx]["element"] = types[idx]["element"].lower() self.info["types"] = types # get types records number self.info["records_per_type"] = [] for idx in range(self.info["number_of_types"]): self.info["records_per_type"].append( self.__get_records_per_type__(lines, idx + 1)) assert sum(self.info["records_per_type"]) == self.info[ "number_of_records"], "the sum of number of molecules in all types must be equal to number of ' molecules of all types'" # get coordinates fracCoord = self.__get_coordinates__(lines) assert fracCoord.shape == ( self.info["number_of_records"], 3 ), "stored fractional coordinates must be equal to number of ' molecules of all types'" # calculate real coordinates realCoord = self.__calculate_real_coordinates__(fracCoord) # create pdb self.__create_pdb__(realCoord) return self
def __get_number_of_types__(self, lines): ntypes = False line = lines.pop(0) if "types of molecules" in line: ntypes = int(line.split()[0]) else: Logger.error("number of 'types of molecules' not found.") raise return ntypes
def __get_number_of_records__(self, lines): nrecords = False while lines: line = lines.pop(0) if "molecules of all types" in line: nrecords = int(line.split()[0]) break if not nrecords: Logger.error("number of 'molecules of all types' not found.") raise return nrecords
def __initialize_variables__(self, clusterToBoxCenter, fold): # referenceIndex self.restOfAtomsIndexes = list( set(self._trajectory.atomsIndexes) - set(self.clusterIndexes)) # translateToCenter assert isinstance( clusterToBoxCenter, bool), Logger.error("clusterToBoxCenter must be boolean") self.clusterToBoxCenter = clusterToBoxCenter # fold assert isinstance(fold, bool), Logger.error("fold must be boolean") self.fold = fold
def __get_box_vectors__(self, lines): while lines: line = lines.pop(0) if "Defining vectors are:" in line: try: ox = [float(it) for it in lines.pop(0).split()] except: Logger.error("couldn't parse defining 'OX' vectors") else: assert len( ox) == 3, "OX vector must have three float entries" try: oy = [float(it) for it in lines.pop(0).split()] except: Logger.error("couldn't parse defining 'OY' vectors") else: assert len( oy) == 3, "OY vector must have three float entries" try: oz = [float(it) for it in lines.pop(0).split()] except: Logger.error("couldn't parse defining 'OZ' vectors") else: assert len( oz) == 3, "OZ vector must have three float entries" break if not lines: Logger.error("simulation box 'Defining vectors' not found.") raise return np.array([ox, oy, oz])
def set_simulation_box(self, simulationBox): """ set the simulation box for the current pdb analysis.\n :Parameters: #. simulationBox (pdbparser.simulationBox): The simulationBox instance """ assert isinstance(simulationBox, (InfiniteBoundaries, PeriodicBoundaries)), Logger.error("simulationBox must be a InfiniteBoundaries or PeriodicBoundaries instance") # create PeriodicBoundaries if isinstance(self._trajectory, pdbparser): assert len(simulationBox) == 1, Logger.error("trajectory is a simngle pdb, simulationBox length must be 1") else: assert len(simulationBox) == len(self._trajectory), Logger.error("simulationBox length must be equal to length of trajectory") self._boundaryConditions = simulationBox
def __write_fnc_file__(self, path, name, bondsMap, bondsMapElementsKey, bonds): """ writes .fnc :Parameters: #. path (str): The RMC++ output configuration file path. #. name (str): The tile name to be put in the beginning of the file #. bondsMap (dict): Dictionary of bonds elements keys mapping to a bonds indexes. Double dash '--' must seperate keys. e.g. {'H2--O': 1, 'H1--O': 2} #. bondsMapElementsKey (list): The list of all atoms keys in pdb used to map atoms to bondsMap. e.g. ["H1","H2", ... , "H1","H2","O","O", ...] #. bonds (dict): The dictionary of bonds indexes. e.g. {1:[100,101], 2:[102,103,104], ..., 999:[], 1000:[20,21], ...} """ try: fd = open(path, 'w') except: Logger.error("cannot open file %r for writing" % outputPath) raise # write pdb name fd.write(" " + str(name) + "\n\n") # write limits bondsMapLUT = {} for b, v in bondsMap.items(): bondsMapLUT[v] = b fd.write(" No. of possible rmin-rmax pairs:\n") fd.write(" " + str(len(bondsMap)) + "\n") # write minimum fd.write("0.90".rjust(10) * len(bondsMap) + "\n") # write maximum fd.write("2.10".rjust(10) * len(bondsMap) + "\n") constraints = "".join( [bondsMapLUT[idx].rjust(10) for idx in sorted(bondsMapLUT.keys())]) fd.write("! %s \n" % constraints[1:]) # write number of records fd.write(" " + str(len(bonds)) + "\n\n") # write records and bonds for cr in sorted(bonds.keys()): ctList = bonds[cr] fd.write(str(cr + 1).rjust(12) + str(len(ctList)).rjust(5) + "\n") types = " " for ct in ctList: fd.write(str(ct + 1).rjust(12)) setted = list( set([bondsMapElementsKey[cr], bondsMapElementsKey[ct]])) types += str(bondsMap[str(setted[0]) + "--" + str(setted[1])]).ljust(1) + " " fd.write("\n") fd.write(types) fd.write("\n") # close file fd.close()
def correlation(data1, data2=None): """ Calculates the numerical correlation between two numpy.ndarray data. :Parameters: #. data1 (numpy.ndarray): the first numpy.ndarray. If multidimensional the correlation calculation is performed on the first dimension. #. data2 (None, numpy.ndarray): the second numpy.ndarray. If None the data1 autocorrelation is calculated. :Returns: #. correlation (numpy.ndarray): the result of the numerical correlation. """ # The signal must not be empty. assert isinstance( data1, np.ndarray), Logger.error("data1 must be a non zero numpy.ndarray") # The length of data1 is stored in data1Length data1Length = len(data1) assert data1Length > 0, Logger.error( "data1 must be a non zero numpy.ndarray") # extendedLength = 2*len(data1) extendedLength = 2 * data1Length # The FCA algorithm: # 1) computation of the FFT of data1 zero-padded until extendedLength # The computation is done along the 0-axis FFTData1 = FFT(data1, extendedLength, 0) if data2 is None: # Autocorrelation case FFTData2 = FFTData1 else: # 2) computation of the FFT of data2 zero-padded until extendedLength # The computation is done along the 0-axis assert isinstance(data2, np.ndarray), Logger.error( "if not None, data2 must be a numpy.ndarray") FFTData2 = FFT(data2, extendedLength, 0) # 3) Product between FFT(data1)* and FFT(data2) FFTData1 = np.conjugate(FFTData1) * FFTData2 # 4) inverse FFT of the product # The computation is done along the 0-axis FFTData1 = iFFT(FFTData1, len(FFTData1), 0) # This refers to (1/(N-m))*Sab in the published algorithm. # This is the correlation function defined for positive indexes only. if len(FFTData1.shape) == 1: corr = FFTData1.real[:data1Length] / (data1Length - np.arange(data1Length)) else: corr = np.add.reduce(FFTData1.real[:data1Length], 1) / (data1Length - np.arange(data1Length)) return corr
def get_random_perpendicular_vector(vector): """ Get random perpendicular vector to a given vector. :Parameters: #. vector (numpy.ndarray, list, set, tuple): the vector to compute a random perpendicular vector to it :Returns: #. perpVector (numpy.ndarray): the perpendicular vector """ vectorNorm = np.linalg.norm(vector) assert vectorNorm, Logger.error("vector returned 0 norm") # easy cases if np.abs(vector[0]) < 1e-6: return np.array([1, 0, 0], dtype=np.float32) elif np.abs(vector[1]) < 1e-6: return np.array([0, 1, 0], dtype=np.float32) elif np.abs(vector[2]) < 1e-6: return np.array([0, 0, 1], dtype=np.float32) # generate random vector randVect = 1 - 2 * np.random.random(3) randvect = np.array([vector[idx] * randVect[idx] for idx in range(3)]) # get perpendicular vector perpVector = np.cross(randvect, vector) # return return np.array(perpVector / np.linalg.norm(perpVector), dtype=np.float32)
def get_atomic_form_factor(q, element, charge=0): """ Calculates the Q dependant atomic form factor.\n :Parameters: #. q (list, tuple, numpy.ndarray): the q vector. #. element (str): the atomic element. #. charge (int): the expected charge of the element. :Returns: #. formFactor (numpy.ndarray): the calculated form factor. """ assert is_element(element), "%s is not an element in database" % element element = str(element).lower() assert charge in __atoms_database__[element][ 'atomicFormFactor'], Logger.error( "atomic form factor for element %s at with %s charge is not defined in database" % (element, charge)) ff = __atoms_database__[element]['atomicFormFactor'][charge] a1 = ff['a1'] b1 = ff['b1'] a2 = ff['a2'] b2 = ff['b2'] a3 = ff['a3'] b3 = ff['b3'] a4 = ff['a4'] b4 = ff['b4'] c = ff['c'] q = np.array(q) qOver4piSquare = (q / (4. * np.pi))**2 t1 = a1 * np.exp(-b1 * qOver4piSquare) t2 = a2 * np.exp(-b2 * qOver4piSquare) t3 = a3 * np.exp(-b3 * qOver4piSquare) t4 = a4 * np.exp(-b4 * qOver4piSquare) return t1 + t2 + t3 + t4 + c
def __save_datasheet__(self, path): keys = list(self.results.keys()) resultsLength = None for key in keys: resSize = np.sum(self.results[key].shape) if len(self.results[key].shape)>1: raise Logger.error("result %r is of dimension %s, only one dimensional results can be saved to datasheet. Try using other formats." %(key,self.results[key].shape)) else: if resultsLength is None: resultsLength = resSize elif resSize != resultsLength: raise Logger.error("All results must have the same size.") resultsArray = np.empty((resultsLength,len(keys))) for idx in range(len(keys)): resultsArray[:,idx] = self.results[keys[idx]] np.savetxt(path, resultsArray, header = ' ; '.join(keys), delimiter=' ; ')
def save(self, path, formats=None): """ Used to export the analysis results stored in self.results dictionary.\n :Parameters: #. path (str): The saving path. #. format (str): The export format. used formats are ascii or bin """ if formats is None: formats = ["ascii"] elif isinstance(formats, str): formats = [formats] else: assert isinstance(formats, (list, tuple)) formats = list(formats) for f in formats: if f == "ascii": self.__save_ascii__(str(path)+".zip") elif f == "datasheet": self.__save_datasheet__(str(path)+".xls") elif f == "bin": self.__save_binary__(str(path)+".pkl") else: raise Logger.error("Unknown saving format %r. only %s formats are acceptable" %(f,["ascii","bin",'datasheet'])) return self
def convert(self): if self.format in ('charmm', 'namd'): self.trajectory = self.__convert_charmm__() else: raise Logger.error("unsupported dcd format") self.trajectory._filePath = self.dcd return self.trajectory
def numberOfAtoms(self): if isinstance(self._trajectory, pdbparser): return len(self._trajectory) elif isinstance(self._trajectory, pdbTrajectory): return self._trajectory.numberOfAtoms else: raise Logger.error("trajectory must be a pdbparser or pdbTrajectory instance")
def numberOfConfigurations(self): if isinstance(self._trajectory, pdbparser): return 1 elif isinstance(self._trajectory, pdbTrajectory): return len(self._trajectory) else: raise Logger.error("trajectory must be a pdbparser or pdbTrajectory instance")
def time(self): if isinstance(self._trajectory, pdbparser): return [0] elif isinstance(self._trajectory, pdbTrajectory): return self._trajectory._time else: raise Logger.error("trajectory must be a pdbparser or pdbTrajectory instance")
def structure(self): if isinstance(self._trajectory, pdbparser): return self._trajectory elif isinstance(self._trajectory, pdbTrajectory): return self._trajectory._structure else: raise Logger.error("trajectory must be a pdbparser or pdbTrajectory instance")
def step(self, index): """" analysis step of calculation method.\n :Parameters: #. index (int): the step index :Returns: #. stepData (object): object used in combine method """ if not isinstance(self._trajectory._boundaryConditions, PeriodicBoundaries): raise Logger.error( "rebuild cluster is not possible with infinite boundaries trajectory" ) # get configuration index confIdx = self.configurationsIndexes[index] # get coordinates boxCoords = self._trajectory.get_configuration_coordinates(confIdx) boxCoords = self._trajectory._boundaryConditions.real_to_box_array( realArray=boxCoords, index=confIdx) # get box coordinates clusterBoxCoords = boxCoords[self.clusterIndexes, :] # initialize variables incrementalCenter = np.array([0., 0., 0.]) centerNumberOfAtoms = 0.0 # incrementally construct cluster for idx in range(clusterBoxCoords.shape[0]): if idx > 0: diff = clusterBoxCoords[idx, :] - (incrementalCenter / centerNumberOfAtoms) # remove multiple box distances intDiff = diff.astype(int) clusterBoxCoords[idx, :] -= intDiff diff -= intDiff # remove half box distances clusterBoxCoords[idx, :] = np.where( np.abs(diff) < 0.5, clusterBoxCoords[idx, :], clusterBoxCoords[idx, :] - np.sign(diff)) incrementalCenter += clusterBoxCoords[idx, :] centerNumberOfAtoms += 1.0 # set cluster atoms new box positions boxCoords[self.clusterIndexes, :] = clusterBoxCoords # translate cluster in box center if self.clusterToBoxCenter: # calculate cluster center of mass center = np.sum(clusterBoxCoords, 0) / len(self.clusterIndexes) # translate cluster to center of box boxCoords += np.array([0.5, 0.5, 0.5]) - center # fold all but cluster atoms if self.fold: boxCoords[self.restOfAtomsIndexes, :] %= 1 # convert to real coordinates coords = self._trajectory._boundaryConditions.box_to_real_array( boxArray=boxCoords, index=confIdx) # set new coordinates self._trajectory.set_configuration_coordinates(confIdx, coords) return index, None
def step(self, index): """ analysis step of calculation method.\n :Parameters: #. index (int): the step index """ raise Logger.error("step method is not implemented")
def next(self): data = self.__file.read(4) if not data: raise StopIteration reclen = struct.unpack(self.__byteOrder + 'i', data)[0] data = self.__file.read(reclen) reclen2 = struct.unpack(self.__byteOrder + 'i', self.__file.read(4))[0] assert reclen == reclen2, Logger.error("data format not respected") return data
def __load_binary__(self, path): # open file try: fd = open(path,'r') except: raise Logger.error("Couldn't open analysis binary file %r." %path) # read file try: resDict = pickle.load(fd) except: fd.close() raise Logger.error("Couldn't read analysis binary file data %r." %path) else: fd.close() for key, values in resDict.items(): if key in self.results: Logger.warn("analysis name %r already exists. Previous values will be erased and updated with the new ones" %key) self.results[key] = values
def set_trajectory(self, trajectory): """ set the trajectory for analysis.\n :Parameters: #. pdb (pdbparser): The pdb instance replacing the constructed self.pdb. """ assert isinstance(trajectory, (pdbparser, pdbTrajectory)), Logger.error("trajectory must be a pdbparser or pdbTrajectory instance") self._trajectory = trajectory self._boundaryConditions = self._trajectory.simulationBox
def get_record(self, format, repeat=False): """ Reads a record of the binary file. :Parameters: #. format (string): the format corresponding to the binray structure to read. #. repeat (boolean): if True, will repeat the reading. """ try: data = self.next() except StopIteration: raise Logger.error("Unexpected end of file") if repeat: unit = struct.calcsize(self.__byteOrder + format) assert len(data) % unit == 0, Logger.error("wrong data length") format = (len(data) / unit) * format try: return struct.unpack(self.__byteOrder + format, data) except: raise Logger.error("not able to unpack data")
def run(self): assert self.numberOfSteps>0 and self.numberOfSteps%1==0, Logger.error("numberOfSteps must be a positive integer, '%s' is given"%self.numberOfSteps) # run steps for idx in range(self.numberOfSteps): # log status self.status(step=idx, logFrequency = 10) # run step and combine self.combine(*self.step(idx)) # finalize self.status(step=self.numberOfSteps, logFrequency = 10) self.finalize()
def initialize_default_attributes(self): # self.pdb if not hasattr(self, "pdb"): object.__setattr__(self, "pdb", pdbparser()) else: assert isinstance( self.pdb, pdbparser), Logger.error("pdb must a pdbparser instance") # self.filePath if not hasattr(self, "filePath"): object.__setattr__(self, "filePath", self.__defaults__["filePath"]) elif self.filePath is not None: try: fd = open(self.filePath, 'r') except: Logger.error("Cannot open %r for reading." % self.filePath) raise else: fd.close() # info self.info = {}