def add_chain_id_pdb(pdb_file, chain='A'): """Adds a chain ID to a PDB file This is a patch because some MD programs remove the chain id from pdb files and this confuses some pdb parsers (works on PDB files only) Parameters --------------- pdb_file : str the pdb file to edit (name or path) chain : str default 'A', the chain id to add to the pdb_file """ if len(chain) > 2: raise ValueError('A chain id can be max 2 letters long!') chain = chain.upper().strip() lines = read_file.read_file(file_name=pdb_file) for i in range(len(lines)): if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][ 0:3] == 'TER': lines[i] = lines[i][:20] + '{0:>2}'.format(chain) + lines[i][22:] lines[i] = lines[i].strip('\n') + '\n' write_file.write_file(lines=lines, file_name=pdb_file)
def add_include_after_atomtypes(include_line, input_top_file, output_top_file): """adds an include statement after atomtypes It checks for the beginning of a [ ... ] section that is not [ atomtypes ] and for the beginning of a #ifdef Parameters ----------- include_line : str the thing to include, DON'T write #include but only the itp file name (or path) input_top_file : str output_top_file : str can be the same as the input one Notes ---------- if you have to include multiple itp files all the [ atomtypes ] must be after the force field include and only then you can add the remaining parts of the itp files """ def is_right_line(line): """complex bool expession check both for the end of [ atomtypes ] and for the beginning of a #ifdef and skips [ defaults ] and [ cmaptypes ] """ _line = line.split(';')[0].strip() if _line: if _line[0] == '[' and \ _line.replace(' ', '') not in ('[atomtypes]', '[defaults]', '[cmaptypes]'): return True if _line[:6] == '#ifdef': return True return False input_top_lines = read_file.read_file(input_top_file) for i in range(len(input_top_lines)): if input_top_lines[i].strip(): if is_right_line(input_top_lines[i]): input_top_lines[i] = \ f'\n#include "{include_line}"\n{input_top_lines[i].strip()}\n' break # If end of file was reached try to put it in the end # as last resort else: input_top_lines[-1] += f'\n#include "{include_line}"\n' write_file.write_file(input_top_lines, output_top_file)
def test_works(self, mocker, test_type, lines): print('Logging test type for visibility: ' + test_type) mocked_open = mocker.patch( 'PythonAuxiliaryFunctions.files_IO.write_file.open') write_file.write_file(lines, 'file.txt') mocked_open.assert_called()
def _write_gaussians(self, gaussians, log_likelyhood): """private""" lines = [] lines.append( f'#each line is a gaussian, log likelyhood = {log_likelyhood}\n') lines.append('mean,sigma,coefficient\n') for gaussian in gaussians: lines.append( f'{gaussian["mu"]:.18e},{gaussian["sigma"]:.18e},{gaussian["lambda"]:.18e}\n' ) _write.write_file(lines, f'{str(self)}_gaussians.csv')
def add_include_after_FF(include_line, input_top_file, output_top_file): """adds an include statement after the FF one In case there is no FF include (es a parmed genereted topology) it will be added after deafults Parameters ----------- include_line : str the thing to include, DON'T write #include but only the itp file name (or path) input_top_file : str output_top_file : str can be the same as the input one Notes ---------- if you have to include multiple itp files all the [ atomtypes ] must be after the force field include and only then you can add the remaining parts of the itp files """ input_top_lines = read_file.read_file(input_top_file) for i in range(len(input_top_lines)): if input_top_lines[i].strip() != '': if input_top_lines[i].strip()[0] != ';': if input_top_lines[i].strip()[0:8] == '#include': input_top_lines[i] += f'\n#include "{include_line}"\n' break # In case there is no FF include (es a parmed genereted topology) if input_top_lines[i].strip()[0] == '[' and \ input_top_lines[i].split(';')[0].strip().replace(' ', '') != '[defaults]': input_top_lines[ i] = f'\n#include "{include_line}"\n' + input_top_lines[ i] break write_file.write_file(input_top_lines, output_top_file)
def remove_velocities(input_gro_file, output_gro_file, keep_velocities=None): """removes the velocities from a gro file Some times gromacs has some problems of instability if the velocities are given in the gro file, in that case it is better to remove them Pay attention if you have a heavy dummy atom, that should always have zero velocity, for that use `keep_velocities` Parameters ------------ input_gro_file : str the name (or path) of the input gro file nr 2 output_gro_file : str the name (or path) of the input gro file can be the same as one of the input ones keep_velocities : list of strings the list of residue names (key sensitive) that should keep the velocity value (useful for dummy atoms), dafalut None no residue keeps its velocity """ input_lines = read_file.read_file(input_gro_file) if keep_velocities is None: keep_velocities = [] output_lines = [] output_lines.append(input_lines[0]) output_lines.append(input_lines[1]) for i in range(2, len(input_lines) - 1): if input_lines[i][5:10].strip() not in keep_velocities: output_lines.append(input_lines[i][:44] + '\n') else: output_lines.append(input_lines[i]) output_lines.append(input_lines[-1]) write_file.write_file(output_lines, output_gro_file)
def execute(self): """calculate free energy Returns --------- free_energy, STD : float, float the free energy and the standard deviation """ z_score = 3.0 work_values = self.get_purged_work_values(self.dhdl_files, md_program=self.md_program, creation=self.creation, z_score=z_score) np.savetxt('work_values.dat', work_values, header=('work_values work values Kcal/mol ' f'(outliers with z score > {z_score} were purged)')) self._free_energy_value += self.calculate_free_energy( work_values, temperature=self.temperature) #volume correction if self.vol_correction_distances is not None: for file_name in self.vol_correction_distances: self._free_energy_value += self.volume_com_com_correction( distance_file=file_name, temperature=self.temperature, md_program=self.md_program) STD = self.calculate_standard_deviation(work_values, temperature=self.temperature) # print the values of delta G and the confidence intervall (95%) lines = [ f'# {str(self)}\n', '# Delta_G STD confidence_intervall_95%(1.96STD) unit=Kcal/mol\n', f'{self._free_energy_value:.18e} {STD:.18e} {1.96*STD:.18e}\n' ] _write.write_file(lines, f'{str(self)}_free_energy.dat') return self._free_energy_value, STD
def add_molecules(name, number, input_top_file, output_top_file): """adds a [ molecules ] statement Parameters ----------- name : str the name of the moelcule number : int the number of molecules input_top_file : str output_top_file : str can be the same as the input one """ input_top_lines = read_file.read_file(input_top_file) input_top_lines.append(f'\n{name} {number}\n') write_file.write_file(input_top_lines, output_top_file)
def merge_gro_files(input_gro_file_1, input_gro_file_2, output_gro_file, choose_box=1, box_lenghts=None): """merge 2 gro files can come in handy to mix a particle with a box of water for alchemical transformations Parameters ------------- input_gro_file_1 : str the name (or path) of the input gro file nr 1 input_gro_file_2 : str the name (or path) of the input gro file nr 2 output_gro_file : str the name (or path) of the input gro file can be the same as one of the input ones choose_box : int, optional the last line of a gro file contains the box lenghts, if `choose_box` = 1 (default) the output gro file will have the box values of `input_gro_file_1` if `choose_box` = 1 the box values of `input_gro_file_2` if `choose_box` = None the function will read `box_lenghts` box_lenghts : iterable, optional if `choose_box` = None the box lenghts will be read from `box_lenghts`, it must be an iterable with 3 floating point values. Gromacs measures in nm """ input_lines_1 = read_file.read_file(input_gro_file_1) input_lines_2 = read_file.read_file(input_gro_file_2) if choose_box == 1: box = input_lines_1[-1].strip().split() elif choose_box == 2: box = input_lines_2[-1].strip().split() elif choose_box is None: box = [] for i in range(len(box_lenghts)): box.append('{:.5f}'.format(box_lenghts[i])) else: raise ValueError(f'{choose_box} is not a valid value for choose_box') output_lines = ['Merged gro files\n'] #adds number of atoms output_lines.append(' {:>5}\n'.format(int(input_lines_1[1].strip()) + \ int(input_lines_2[1].strip()))) atom = 1 residue = 1 pevious_residue = '1' for lines in (input_lines_1, input_lines_2): for i in range(2, len(lines) - 1): if lines[i].strip() != '': #check for residue change if lines[i][:5].strip() != pevious_residue: pevious_residue = lines[i][:5].strip() residue += 1 output_lines.append('{:>5}{:<5}{:>5}{:>5}{}\n'.format( residue, #residue number lines[i][5:10], #residue name lines[i][10:15], #atom name atom, #atom number lines[i][20:].strip('\n'))) #update atom number atom += 1 del input_lines_1 del input_lines_2 output_lines.append(2 * ' ' + ' '.join(box) + '\n') write_file.write_file(output_lines, output_gro_file)
def add_atom_to_gro_file(input_gro_file, output_gro_file, coordinates, velocities=None, atom_name='DU', atom_residue_name='DUM'): """adds a given atom to a gro file comes in handy to add a dummy atom remember that unlike PDB files GRO files are in nanometers nm!! Parameters ------------- input_gro_file : str the name (or path) of the input gro file output_gro_file : str the name (or path) of the input gro file can be the same as the input one coordinates : iterable of float (x,y,z) iterable of any type (list, tuple, ...) remember that unlike PDB files GRO files are in nanometers nm!! velocities : iterable of float, optional (vx,vy,vz) iterable of any type (list, tuple, ...) for default velocities will be left blank atom_name : str 2 characters atom name atom_residue_name : 2 or 3 characters residue name """ input_lines = read_file.read_file(input_gro_file) #adds an atom to the atom count input_lines[1] = '{:>5}\n'.format(int(input_lines[1].strip()) + 1) for i in range(len(input_lines) - 1, 0, -1): if input_lines[i].strip() != '': if velocities is not None: velocity_string = '{:8.4f}{:8.4f}{:8.4f}'.format( velocities[0], velocities[1], velocities[2]) else: velocity_string = 24 * ' ' input_lines[i - 1] \ += '{:>5}{:<5}{:>5}{:>5}{:8.3f}{:8.3f}{:8.3f}{}\n'.format( int(input_lines[i - 1][0:5].strip()) + 1, #residue number atom_residue_name, #residue name atom_name, #atom name int(input_lines[i - 1][15:20].strip()) + 1, #atom number coordinates[0], coordinates[1], coordinates[2], velocity_string) break write_file.write_file(input_lines, output_gro_file)
def execute(self): """Calculates the free energy Returns ----------- float, float free energy, STD """ #calculate and purge outliers (zscore > z_score) #for bound and unbound work z_score = 3.0 #numpy array bound_work_values = self.get_purged_work_values( self.bound_state_dhdl, md_program=self.md_program, creation=False, z_score=z_score) #numpy array unbound_work_values = self.get_purged_work_values( self.unbound_state_dhdl, md_program=self.md_program, creation=True, z_score=z_score) # print a backup to file np.savetxt('bound_work_values.dat', bound_work_values, header=('bound work values after z score purging Kcal/mol ' f'(outliers with z score > {z_score} were purged)')) np.savetxt( 'unbound_work_values.dat', unbound_work_values, header=('unbound work values after z score purging Kcal/mol ' f'(outliers with z score > {z_score} were purged)')) #get STD STD = self.vdssb_calculate_standard_deviation( bound_work_values, unbound_work_values, temperature=self.temperature) #get free energy self._free_energy_value += self.vdssb_calculate_free_energy( bound_work_values, unbound_work_values, temperature=self.temperature) #make a backup of the combined work values combined_work_values = combine_works.combine_non_correlated_works( bound_work_values, unbound_work_values) del bound_work_values del unbound_work_values # print a backup to file np.savetxt('combined_work_values.dat', combined_work_values, header=('combined_work_values work values Kcal/mol')) del combined_work_values #volume correction if self.vol_correction_distances_bound_state is not None: for file_name in self.vol_correction_distances_bound_state: self._free_energy_value += self.volume_com_com_correction( distance_file=file_name, temperature=self.temperature, md_program=self.md_program) if self.vol_correction_distances_unbound_state is not None: for file_name in self.vol_correction_distances_unbound_state: self._free_energy_value += self.volume_com_com_correction( distance_file=file_name, temperature=self.temperature, md_program=self.md_program) # print the values of delta G and the confidence intervall (2 sigma) lines = [ f'# {str(self)}\n', '# Delta_G STD confidence_intervall_95%(1.96STD) unit=Kcal/mol\n', f'{self._free_energy_value:.18e} {STD:.18e} {1.96*STD:.18e}\n' ] _write.write_file(lines, f'{str(self)}_free_energy.dat') return self._free_energy_value, STD
def COM_COM_restraint(atom_groups, restraint_parameters, plumed_file='plumed.dat', distances_file='distances.out', stride=100, geometric=False): """create a center of mass (COM) - center of mass restraint with plumed creates a plumed input file https://www.plumed.org in order to create a restraint between 2 centers of mass all distances (input and output) will be in nanometers nm! Parameters ------------ atom_groups : dict {"group_name" : [<list of atoms>], ...} the keys of the dictionary must be the group names (NO SPACES!!!) and the value must be a list containing all atom numbers of the group, there can be as many groups as you like restraint_parameters : list a nested list: [ ["grop_name_1", "grop_name_2", equilibium_distance_nm, harmonic_kappa, linear_slope], ... ] the names must be str the other parameters float, remember that you can always set a certain parameter to zero 0. plumed_file : str the name of the plumed input file to be created (default 'plumed.dat') distances_file : str the name of the file that plumed will create and in which all the distances will be written out (default 'distances.out') stride : int `distances_file` will be updated each `stride` MD steps (default 100) geometric : bool, optional if True instead of the center of mass the geometrical center will be calulated (default False). Can come in handy with strange atoms/residues Raises --------- ValueError if `atom_groups` contains only one group but there is no check to see if `restraint_parameters` doesn't contain a value for each possible couple """ def make_atoms_string(atoms): """private """ #build atoms string atom_string = '' for atom in atoms: atom_string += f'{atom},' #remove last comma atom_string = atom_string[:-1] return atom_string if len(atom_groups) < 2: raise ValueError( f'need at least 2 atom groups, not {len(atom_groups)}') output = [] #put the units explicitly in order to be always sure what you #get in output and what you are expected to put in input output.append('UNITS LENGTH=nm TIME=ps \n\n') if geometric: com = 'CENTER' else: com = 'COM' #WHOLEMOLECULES string string = 'WHOLEMOLECULES ' for i, name in enumerate(atom_groups.keys()): #build atoms string atom_string = make_atoms_string(atom_groups[name]) string += f'ENTITY{i}={atom_string} ' string += '\n' output.append(string) #define centers of mass (COM or CENTER): for name in atom_groups.keys(): atom_string = make_atoms_string(atom_groups[name]) string = f'{name}: {com} ATOMS={atom_string}\n' output.append(string) #define COM-COM distances DISTANCE NOPBC names = list(atom_groups.keys()) distances_list = [] for i in range(len(names)): for j in range(i + 1, len(names)): string = f'{names[i]}_{names[j]}_dist: DISTANCE ATOMS={names[i]},{names[j]} NOPBC\n' distances_list.append( [f'{names[i]}_{names[j]}_dist', names[i], names[j]]) output.append(string) #create restraints RESTRAINT for couple in restraint_parameters: for distance in distances_list: if (distance[1] in (couple[0], couple[1])) and (distance[2] in (couple[0], couple[1])): string = \ 'RESTRAINT ARG={} AT={:.4f} KAPPA={:.4f} SLOPE={:.4f}\n'.format( distance[0], couple[2], couple[3], couple[4] ) output.append(string) #PRINT statement string = 'PRINT ARG=' for distance in distances_list: string += distance[0] + ',' #remove last comma string = string[:-1] string += f' STRIDE={stride} ' string += f'FILE={distances_file}\n' output.append(string) #write file write_file.write_file(output, plumed_file)
def merge_pdb(input_pdb_1, input_pdb_2, output_pdb): """Merge 2 PDB files this function is brutal and memory consuming I should do it better in the future it adds file 2 to file 1 (the output file will be in order 1-2) The header of file 2 will be omitted and that of file 1 untouched Parameters ------------ input_pdb_1 : str input_pdb_2 : str output_pdb : str can also be one of the input ones """ # pylint: disable=too-many-branches lines_1 = read_file.read_file(input_pdb_1) #to be sure each line is long enough (there are plenty of # non standard PDB files) for i in range(len(lines_1)): if len(lines_1[i]) < 78: padding_spaces = 78 - len(lines_1[i]) lines_1[i] += padding_spaces * ' ' #get the index of the line with the last ATOM HETATM or TER line #and get the resnum of this last residue for i in range(len(lines_1) - 1, -1, -1): # pylint: disable=no-else-break if lines_1[i][0:4] == 'ATOM' or lines_1[i][0:6] == 'HETATM': residue_number = int(lines_1[i][22:26].strip()) atom_number = int(lines_1[i][6:11].strip()) index_protein_file = i + 1 break elif lines_1[i][0:3] == 'TER': #some TER lines are non standard and don't contain the residue number residue_number = int(lines_1[i - 1][22:26].strip()) atom_number = int(lines_1[i - 1][6:11].strip()) index_protein_file = i + 1 break else: raise ValueError("This PDB doesn't contain coordinates") lines_2 = read_file.read_file(input_pdb_2) #to be sure each line is long enough (there are plenty of # non standard PDB files) for i in range(len(lines_2)): if len(lines_2[i]) < 78: padding_spaces = 78 - len(lines_2[i]) lines_2[i] += padding_spaces * ' ' #find first coordinate line in input_pdb_2 for i in range(len(lines_2)): if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM': beginnig_pdb_2 = i break else: raise ValueError("This PDB doesn't contain coordinates") for i in range(len(lines_2) - 1, -1, -1): # pylint: disable=no-else-break if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM': end_pdb_2 = i break elif lines_2[i][0:3] == 'TER': end_pdb_2 = i break else: raise ValueError("This PDB doesn't contain coordinates") lines_2 = lines_2[beginnig_pdb_2:end_pdb_2 + 1] residue_number += 1 old_residue_number = lines_2[0][22:26] for i in range(len(lines_2)): atom_number += 1 if lines_2[i][22:26] != old_residue_number: residue_number += 1 lines_2[i] = lines_2[i][:6] + '{:>5}'.format(atom_number) + lines_2[i][ 11:22] + '{:>4}'.format(residue_number) + lines_2[i][26:78] #insert the ligands in the right place of the protein_file list lines_1[index_protein_file:index_protein_file] = lines_2 write_file.write_file(lines=lines_1, file_name=output_pdb)