Beispiel #1
0
def add_chain_id_pdb(pdb_file, chain='A'):
    """Adds a chain ID to a PDB file

    This is a patch because some MD programs remove the chain id from
    pdb files and this confuses some pdb parsers (works on PDB files only)

    Parameters
    ---------------
    pdb_file : str
        the pdb file to edit (name or path)
    chain : str
        default 'A', the chain id to add to the pdb_file
    """

    if len(chain) > 2:
        raise ValueError('A chain id can be max 2 letters long!')

    chain = chain.upper().strip()

    lines = read_file.read_file(file_name=pdb_file)

    for i in range(len(lines)):

        if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][
                0:3] == 'TER':

            lines[i] = lines[i][:20] + '{0:>2}'.format(chain) + lines[i][22:]

            lines[i] = lines[i].strip('\n') + '\n'

    write_file.write_file(lines=lines, file_name=pdb_file)
def add_include_after_atomtypes(include_line, input_top_file, output_top_file):
    """adds an include statement after atomtypes

    It checks for the beginning of a [ ... ] section that is not [ atomtypes ]
    and for the beginning of a #ifdef

    Parameters
    -----------
    include_line : str
        the thing to include, DON'T write #include
        but only the itp file name (or path)
    input_top_file : str
    output_top_file : str
        can be the same as the input one

    Notes
    ----------
    if you have to include multiple itp files all the [ atomtypes ]
    must be after the force field include and only then you can add the remaining parts of the
    itp files
    """
    def is_right_line(line):
        """complex bool expession
        check both for the end of [ atomtypes ] and for the beginning of a #ifdef
        and skips [ defaults ] and [ cmaptypes ]
        """
        _line = line.split(';')[0].strip()

        if _line:
            if _line[0] == '[' and \
                _line.replace(' ', '') not in ('[atomtypes]', '[defaults]', '[cmaptypes]'):

                return True

            if _line[:6] == '#ifdef':
                return True

        return False

    input_top_lines = read_file.read_file(input_top_file)

    for i in range(len(input_top_lines)):

        if input_top_lines[i].strip():

            if is_right_line(input_top_lines[i]):

                input_top_lines[i] = \
                f'\n#include "{include_line}"\n{input_top_lines[i].strip()}\n'

                break

    # If end of file was reached try to put it in the end
    # as last resort
    else:
        input_top_lines[-1] += f'\n#include "{include_line}"\n'

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #3
0
    def test_works(self, mocker, test_type, lines):

        print('Logging test type for visibility: ' + test_type)

        mocked_open = mocker.patch(
            'PythonAuxiliaryFunctions.files_IO.write_file.open')

        write_file.write_file(lines, 'file.txt')

        mocked_open.assert_called()
    def _write_gaussians(self, gaussians, log_likelyhood):
        """private"""

        lines = []

        lines.append(
            f'#each line is a gaussian, log likelyhood = {log_likelyhood}\n')
        lines.append('mean,sigma,coefficient\n')

        for gaussian in gaussians:

            lines.append(
                f'{gaussian["mu"]:.18e},{gaussian["sigma"]:.18e},{gaussian["lambda"]:.18e}\n'
            )

        _write.write_file(lines, f'{str(self)}_gaussians.csv')
def add_include_after_FF(include_line, input_top_file, output_top_file):
    """adds an include statement after the FF one

    In case there is no FF include (es a parmed genereted topology) it will
    be added after deafults

    Parameters
    -----------
    include_line : str
        the thing to include, DON'T write #include
        but only the itp file name (or path)
    input_top_file : str
    output_top_file : str
        can be the same as the input one

    Notes
    ----------
    if you have to include multiple itp files all the [ atomtypes ]
    must be after the force field include and only then you can add the remaining parts of the
    itp files
    """

    input_top_lines = read_file.read_file(input_top_file)

    for i in range(len(input_top_lines)):

        if input_top_lines[i].strip() != '':
            if input_top_lines[i].strip()[0] != ';':

                if input_top_lines[i].strip()[0:8] == '#include':

                    input_top_lines[i] += f'\n#include "{include_line}"\n'

                    break

                # In case there is no FF include (es a parmed genereted topology)
                if input_top_lines[i].strip()[0] == '[' and \
                    input_top_lines[i].split(';')[0].strip().replace(' ', '') != '[defaults]':

                    input_top_lines[
                        i] = f'\n#include "{include_line}"\n' + input_top_lines[
                            i]

                    break

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #6
0
def remove_velocities(input_gro_file, output_gro_file, keep_velocities=None):
    """removes the velocities from a gro file

    Some times gromacs has some problems of instability
    if the velocities are given in the gro file,
    in that case it is better to remove them

    Pay attention if you have a heavy dummy atom, that should
    always have zero velocity, for that use `keep_velocities`

    Parameters
    ------------
    input_gro_file : str
        the name (or path) of the input gro file nr 2
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as one of the input ones
    keep_velocities : list of strings
        the list of residue names (key sensitive) that should
        keep the velocity value (useful for dummy atoms),
        dafalut None no residue keeps its velocity
    """

    input_lines = read_file.read_file(input_gro_file)

    if keep_velocities is None:
        keep_velocities = []

    output_lines = []
    output_lines.append(input_lines[0])
    output_lines.append(input_lines[1])

    for i in range(2, len(input_lines) - 1):

        if input_lines[i][5:10].strip() not in keep_velocities:

            output_lines.append(input_lines[i][:44] + '\n')

        else:

            output_lines.append(input_lines[i])

    output_lines.append(input_lines[-1])

    write_file.write_file(output_lines, output_gro_file)
    def execute(self):
        """calculate free energy

        Returns
        ---------
        free_energy, STD : float, float
            the free energy and the standard deviation
        """

        z_score = 3.0
        work_values = self.get_purged_work_values(self.dhdl_files,
                                                  md_program=self.md_program,
                                                  creation=self.creation,
                                                  z_score=z_score)

        np.savetxt('work_values.dat',
                   work_values,
                   header=('work_values work values Kcal/mol '
                           f'(outliers with z score > {z_score} were purged)'))

        self._free_energy_value += self.calculate_free_energy(
            work_values, temperature=self.temperature)

        #volume correction
        if self.vol_correction_distances is not None:

            for file_name in self.vol_correction_distances:

                self._free_energy_value += self.volume_com_com_correction(
                    distance_file=file_name,
                    temperature=self.temperature,
                    md_program=self.md_program)

        STD = self.calculate_standard_deviation(work_values,
                                                temperature=self.temperature)

        # print the values of delta G and the confidence intervall (95%)
        lines = [
            f'# {str(self)}\n',
            '# Delta_G  STD  confidence_intervall_95%(1.96STD)  unit=Kcal/mol\n',
            f'{self._free_energy_value:.18e} {STD:.18e} {1.96*STD:.18e}\n'
        ]
        _write.write_file(lines, f'{str(self)}_free_energy.dat')

        return self._free_energy_value, STD
def add_molecules(name, number, input_top_file, output_top_file):
    """adds a [ molecules ] statement

    Parameters
    -----------
    name : str
        the name of the moelcule
    number : int
        the number of molecules
    input_top_file : str
    output_top_file : str
        can be the same as the input one
    """

    input_top_lines = read_file.read_file(input_top_file)

    input_top_lines.append(f'\n{name}     {number}\n')

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #9
0
def merge_gro_files(input_gro_file_1,
                    input_gro_file_2,
                    output_gro_file,
                    choose_box=1,
                    box_lenghts=None):
    """merge 2 gro files

    can come in handy to mix a particle with a box of water
    for alchemical transformations

    Parameters
    -------------
    input_gro_file_1 : str
        the name (or path) of the input gro file nr 1
    input_gro_file_2 : str
        the name (or path) of the input gro file nr 2
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as one of the input ones
    choose_box : int, optional
        the last line of a gro file contains the box
        lenghts, if `choose_box` = 1 (default) the output
        gro file will have the box values of `input_gro_file_1`
        if `choose_box` = 1 the box values of `input_gro_file_2`
        if `choose_box` = None the function will read `box_lenghts`
    box_lenghts : iterable, optional
        if `choose_box` = None the box lenghts will be read from
        `box_lenghts`, it must be an iterable with 3 floating point
        values. Gromacs measures in nm
    """

    input_lines_1 = read_file.read_file(input_gro_file_1)

    input_lines_2 = read_file.read_file(input_gro_file_2)

    if choose_box == 1:

        box = input_lines_1[-1].strip().split()

    elif choose_box == 2:

        box = input_lines_2[-1].strip().split()

    elif choose_box is None:

        box = []

        for i in range(len(box_lenghts)):

            box.append('{:.5f}'.format(box_lenghts[i]))

    else:
        raise ValueError(f'{choose_box} is not a valid value for choose_box')

    output_lines = ['Merged gro files\n']

    #adds number of atoms
    output_lines.append(' {:>5}\n'.format(int(input_lines_1[1].strip()) + \
        int(input_lines_2[1].strip())))

    atom = 1
    residue = 1
    pevious_residue = '1'
    for lines in (input_lines_1, input_lines_2):

        for i in range(2, len(lines) - 1):

            if lines[i].strip() != '':

                #check for residue change
                if lines[i][:5].strip() != pevious_residue:

                    pevious_residue = lines[i][:5].strip()

                    residue += 1

                output_lines.append('{:>5}{:<5}{:>5}{:>5}{}\n'.format(
                    residue,  #residue number
                    lines[i][5:10],  #residue name
                    lines[i][10:15],  #atom name
                    atom,  #atom number
                    lines[i][20:].strip('\n')))

                #update atom number
                atom += 1

    del input_lines_1
    del input_lines_2

    output_lines.append(2 * ' ' + '  '.join(box) + '\n')

    write_file.write_file(output_lines, output_gro_file)
Beispiel #10
0
def add_atom_to_gro_file(input_gro_file,
                         output_gro_file,
                         coordinates,
                         velocities=None,
                         atom_name='DU',
                         atom_residue_name='DUM'):
    """adds a given atom to a gro file

    comes in handy to add a dummy atom
    remember that unlike PDB files GRO files
    are in nanometers nm!!

    Parameters
    -------------
    input_gro_file : str
        the name (or path) of the input gro file
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as the input one
    coordinates : iterable of float
        (x,y,z) iterable of any type (list, tuple, ...)
        remember that unlike PDB files GRO files
        are in nanometers nm!!
    velocities : iterable of float, optional
        (vx,vy,vz) iterable of any type (list, tuple, ...)
        for default velocities will be left blank
    atom_name : str
        2 characters atom name
    atom_residue_name :
        2 or 3 characters residue name
    """

    input_lines = read_file.read_file(input_gro_file)

    #adds an atom to the atom count
    input_lines[1] = '{:>5}\n'.format(int(input_lines[1].strip()) + 1)

    for i in range(len(input_lines) - 1, 0, -1):

        if input_lines[i].strip() != '':

            if velocities is not None:

                velocity_string = '{:8.4f}{:8.4f}{:8.4f}'.format(
                    velocities[0], velocities[1], velocities[2])

            else:

                velocity_string = 24 * ' '

            input_lines[i - 1] \
                += '{:>5}{:<5}{:>5}{:>5}{:8.3f}{:8.3f}{:8.3f}{}\n'.format(
                    int(input_lines[i - 1][0:5].strip()) + 1,  #residue number
                    atom_residue_name,  #residue name
                    atom_name,  #atom name
                    int(input_lines[i - 1][15:20].strip()) + 1,  #atom number
                    coordinates[0],
                    coordinates[1],
                    coordinates[2],
                    velocity_string)

            break

    write_file.write_file(input_lines, output_gro_file)
    def execute(self):
        """Calculates the free energy

        Returns
        -----------
        float, float
            free energy, STD
        """

        #calculate and purge outliers (zscore > z_score)
        #for bound and unbound work

        z_score = 3.0

        #numpy array
        bound_work_values = self.get_purged_work_values(
            self.bound_state_dhdl,
            md_program=self.md_program,
            creation=False,
            z_score=z_score)

        #numpy array
        unbound_work_values = self.get_purged_work_values(
            self.unbound_state_dhdl,
            md_program=self.md_program,
            creation=True,
            z_score=z_score)

        # print a backup to file
        np.savetxt('bound_work_values.dat',
                   bound_work_values,
                   header=('bound work values after z score purging Kcal/mol '
                           f'(outliers with z score > {z_score} were purged)'))

        np.savetxt(
            'unbound_work_values.dat',
            unbound_work_values,
            header=('unbound work values after z score purging Kcal/mol '
                    f'(outliers with z score > {z_score} were purged)'))

        #get STD
        STD = self.vdssb_calculate_standard_deviation(
            bound_work_values,
            unbound_work_values,
            temperature=self.temperature)

        #get free energy
        self._free_energy_value += self.vdssb_calculate_free_energy(
            bound_work_values,
            unbound_work_values,
            temperature=self.temperature)

        #make a backup of the combined work values
        combined_work_values = combine_works.combine_non_correlated_works(
            bound_work_values, unbound_work_values)

        del bound_work_values
        del unbound_work_values

        # print a backup to file
        np.savetxt('combined_work_values.dat',
                   combined_work_values,
                   header=('combined_work_values work values Kcal/mol'))

        del combined_work_values

        #volume correction
        if self.vol_correction_distances_bound_state is not None:

            for file_name in self.vol_correction_distances_bound_state:

                self._free_energy_value += self.volume_com_com_correction(
                    distance_file=file_name,
                    temperature=self.temperature,
                    md_program=self.md_program)

        if self.vol_correction_distances_unbound_state is not None:

            for file_name in self.vol_correction_distances_unbound_state:

                self._free_energy_value += self.volume_com_com_correction(
                    distance_file=file_name,
                    temperature=self.temperature,
                    md_program=self.md_program)

        # print the values of delta G and the confidence intervall (2 sigma)
        lines = [
            f'# {str(self)}\n',
            '# Delta_G  STD  confidence_intervall_95%(1.96STD)  unit=Kcal/mol\n',
            f'{self._free_energy_value:.18e} {STD:.18e} {1.96*STD:.18e}\n'
        ]
        _write.write_file(lines, f'{str(self)}_free_energy.dat')

        return self._free_energy_value, STD
Beispiel #12
0
def COM_COM_restraint(atom_groups,
                      restraint_parameters,
                      plumed_file='plumed.dat',
                      distances_file='distances.out',
                      stride=100,
                      geometric=False):
    """create a center of mass (COM) - center of mass restraint with plumed

    creates a plumed input file https://www.plumed.org in order to create
    a restraint between 2 centers of mass

    all distances (input and output) will be in nanometers nm!

    Parameters
    ------------
    atom_groups : dict
        {"group_name" : [<list of atoms>], ...} the keys of the dictionary must
        be the group names (NO SPACES!!!)
        and the value must be a list containing all atom numbers of the group,
        there can be as many groups as
        you like
    restraint_parameters : list
        a nested list: [ ["grop_name_1", "grop_name_2", equilibium_distance_nm,
        harmonic_kappa, linear_slope], ... ]
        the names must be str the other parameters float, remember that you
        can always set a certain parameter to zero 0.
    plumed_file : str
        the name of the plumed input file to be created
        (default 'plumed.dat')
    distances_file : str
        the name of the file that plumed will create and in which all the distances will be
        written out (default 'distances.out')
    stride : int
        `distances_file` will be updated each `stride` MD steps
        (default 100)
    geometric : bool, optional
        if True instead of the center of mass the geometrical center
        will be calulated (default False). Can come in handy with strange
        atoms/residues

    Raises
    ---------
    ValueError
        if `atom_groups` contains only one group
        but there is no check to see if
        `restraint_parameters` doesn't contain a value for
        each possible couple
    """
    def make_atoms_string(atoms):
        """private
        """

        #build atoms string
        atom_string = ''
        for atom in atoms:

            atom_string += f'{atom},'

        #remove last comma
        atom_string = atom_string[:-1]

        return atom_string

    if len(atom_groups) < 2:
        raise ValueError(
            f'need at least 2 atom groups, not {len(atom_groups)}')

    output = []

    #put the units explicitly in order to be always sure what you
    #get in output and what you are expected to put in input
    output.append('UNITS LENGTH=nm TIME=ps \n\n')

    if geometric:
        com = 'CENTER'
    else:
        com = 'COM'

    #WHOLEMOLECULES string
    string = 'WHOLEMOLECULES '
    for i, name in enumerate(atom_groups.keys()):

        #build atoms string
        atom_string = make_atoms_string(atom_groups[name])

        string += f'ENTITY{i}={atom_string} '

    string += '\n'

    output.append(string)

    #define centers of mass (COM or CENTER):
    for name in atom_groups.keys():

        atom_string = make_atoms_string(atom_groups[name])

        string = f'{name}: {com} ATOMS={atom_string}\n'

        output.append(string)

    #define COM-COM distances DISTANCE NOPBC
    names = list(atom_groups.keys())
    distances_list = []
    for i in range(len(names)):

        for j in range(i + 1, len(names)):

            string = f'{names[i]}_{names[j]}_dist: DISTANCE ATOMS={names[i]},{names[j]} NOPBC\n'

            distances_list.append(
                [f'{names[i]}_{names[j]}_dist', names[i], names[j]])

            output.append(string)

    #create restraints RESTRAINT
    for couple in restraint_parameters:

        for distance in distances_list:

            if (distance[1]
                    in (couple[0], couple[1])) and (distance[2]
                                                    in (couple[0], couple[1])):

                string = \
                'RESTRAINT ARG={} AT={:.4f} KAPPA={:.4f} SLOPE={:.4f}\n'.format(
                    distance[0],
                    couple[2],
                    couple[3],
                    couple[4]
                )

                output.append(string)

    #PRINT statement
    string = 'PRINT ARG='

    for distance in distances_list:

        string += distance[0] + ','

    #remove last comma
    string = string[:-1]

    string += f' STRIDE={stride} '

    string += f'FILE={distances_file}\n'

    output.append(string)

    #write file
    write_file.write_file(output, plumed_file)
def merge_pdb(input_pdb_1, input_pdb_2, output_pdb):
    """Merge 2 PDB files

    this function is brutal and memory consuming I should do it better in the future
    it adds file 2 to file 1 (the output file will be in order 1-2)

    The header of file 2 will be omitted and that of file 1 untouched

    Parameters
    ------------
    input_pdb_1 : str
    input_pdb_2 : str
    output_pdb : str
        can also be one of the input ones
    """
    # pylint: disable=too-many-branches

    lines_1 = read_file.read_file(input_pdb_1)

    #to be sure each line is long enough (there are plenty of
    # non standard PDB files)
    for i in range(len(lines_1)):

        if len(lines_1[i]) < 78:

            padding_spaces = 78 - len(lines_1[i])

            lines_1[i] += padding_spaces * ' '

    #get the index of the line with the last ATOM HETATM or TER line
    #and get the resnum of this last residue
    for i in range(len(lines_1) - 1, -1, -1):

        # pylint: disable=no-else-break
        if lines_1[i][0:4] == 'ATOM' or lines_1[i][0:6] == 'HETATM':

            residue_number = int(lines_1[i][22:26].strip())

            atom_number = int(lines_1[i][6:11].strip())

            index_protein_file = i + 1

            break

        elif lines_1[i][0:3] == 'TER':

            #some TER lines are non standard and don't contain the residue number
            residue_number = int(lines_1[i - 1][22:26].strip())

            atom_number = int(lines_1[i - 1][6:11].strip())

            index_protein_file = i + 1

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    lines_2 = read_file.read_file(input_pdb_2)

    #to be sure each line is long enough (there are plenty of
    # non standard PDB files)
    for i in range(len(lines_2)):

        if len(lines_2[i]) < 78:

            padding_spaces = 78 - len(lines_2[i])

            lines_2[i] += padding_spaces * ' '

    #find first coordinate line in input_pdb_2
    for i in range(len(lines_2)):

        if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM':

            beginnig_pdb_2 = i

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    for i in range(len(lines_2) - 1, -1, -1):

        # pylint: disable=no-else-break
        if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM':

            end_pdb_2 = i

            break

        elif lines_2[i][0:3] == 'TER':

            end_pdb_2 = i

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    lines_2 = lines_2[beginnig_pdb_2:end_pdb_2 + 1]

    residue_number += 1

    old_residue_number = lines_2[0][22:26]

    for i in range(len(lines_2)):

        atom_number += 1

        if lines_2[i][22:26] != old_residue_number:

            residue_number += 1

        lines_2[i] = lines_2[i][:6] + '{:>5}'.format(atom_number) + lines_2[i][
            11:22] + '{:>4}'.format(residue_number) + lines_2[i][26:78]

    #insert the ligands in the right place of the protein_file list
    lines_1[index_protein_file:index_protein_file] = lines_2

    write_file.write_file(lines=lines_1, file_name=output_pdb)