Beispiel #1
0
    def test_works(self, tmp_path):

        input_dir = Path('tests/integration_tests/input_files')

        trajectory = input_dir / 'ch2cl2.trr'

        topology = input_dir / 'ch2cl2.tpr'

        output_path = tmp_path / 'ch2cl2_extract_frames'

        output_path.mkdir()

        output_file = output_path / 'output'

        output = extract_frames.extract_frames(10, trajectory, topology,
                                               output_file, 'pdb', 3, 89)

        assert output == 8

        assert set(output_path.iterdir()) == set([
            output_path / 'output0.pdb', output_path / 'output1.pdb',
            output_path / 'output2.pdb', output_path / 'output3.pdb',
            output_path / 'output4.pdb', output_path / 'output5.pdb',
            output_path / 'output6.pdb', output_path / 'output7.pdb'
        ])

        assert read_file.read_file(output_path / 'output0.pdb')[1:] != \
            read_file.read_file(output_path / 'output1.pdb')[1:]
    def test_works(self, mocker):

        mocked_open = mocker.patch(
            'PythonAuxiliaryFunctions.files_IO.read_file.open')

        read_file.read_file(file_name='DUM')

        mocked_open.assert_called()
Beispiel #3
0
def add_chain_id_pdb(pdb_file, chain='A'):
    """Adds a chain ID to a PDB file

    This is a patch because some MD programs remove the chain id from
    pdb files and this confuses some pdb parsers (works on PDB files only)

    Parameters
    ---------------
    pdb_file : str
        the pdb file to edit (name or path)
    chain : str
        default 'A', the chain id to add to the pdb_file
    """

    if len(chain) > 2:
        raise ValueError('A chain id can be max 2 letters long!')

    chain = chain.upper().strip()

    lines = read_file.read_file(file_name=pdb_file)

    for i in range(len(lines)):

        if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][
                0:3] == 'TER':

            lines[i] = lines[i][:20] + '{0:>2}'.format(chain) + lines[i][22:]

            lines[i] = lines[i].strip('\n') + '\n'

    write_file.write_file(lines=lines, file_name=pdb_file)
def add_include_after_atomtypes(include_line, input_top_file, output_top_file):
    """adds an include statement after atomtypes

    It checks for the beginning of a [ ... ] section that is not [ atomtypes ]
    and for the beginning of a #ifdef

    Parameters
    -----------
    include_line : str
        the thing to include, DON'T write #include
        but only the itp file name (or path)
    input_top_file : str
    output_top_file : str
        can be the same as the input one

    Notes
    ----------
    if you have to include multiple itp files all the [ atomtypes ]
    must be after the force field include and only then you can add the remaining parts of the
    itp files
    """
    def is_right_line(line):
        """complex bool expession
        check both for the end of [ atomtypes ] and for the beginning of a #ifdef
        and skips [ defaults ] and [ cmaptypes ]
        """
        _line = line.split(';')[0].strip()

        if _line:
            if _line[0] == '[' and \
                _line.replace(' ', '') not in ('[atomtypes]', '[defaults]', '[cmaptypes]'):

                return True

            if _line[:6] == '#ifdef':
                return True

        return False

    input_top_lines = read_file.read_file(input_top_file)

    for i in range(len(input_top_lines)):

        if input_top_lines[i].strip():

            if is_right_line(input_top_lines[i]):

                input_top_lines[i] = \
                f'\n#include "{include_line}"\n{input_top_lines[i].strip()}\n'

                break

    # If end of file was reached try to put it in the end
    # as last resort
    else:
        input_top_lines[-1] += f'\n#include "{include_line}"\n'

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #5
0
    def test_works(self, tmp_path):

        input_dir = Path('tests/integration_tests/input_files')

        trajectory = input_dir / 'ch2cl2.trr'

        topology = input_dir / 'ch2cl2.tpr'

        output_path = tmp_path / 'ch2cl2_extract_frames'

        output_path.mkdir()

        output_file = output_path / 'output'

        output = extract_frames.extract_all_frames(trajectory, topology,
                                                   output_file, 'pdb')

        assert output == 4001

        assert len(list(output_path.iterdir())) == 4001

        assert read_file.read_file(output_path / 'output0.pdb')[1:] != \
            read_file.read_file(output_path / 'output1.pdb')[1:]
def add_include_after_FF(include_line, input_top_file, output_top_file):
    """adds an include statement after the FF one

    In case there is no FF include (es a parmed genereted topology) it will
    be added after deafults

    Parameters
    -----------
    include_line : str
        the thing to include, DON'T write #include
        but only the itp file name (or path)
    input_top_file : str
    output_top_file : str
        can be the same as the input one

    Notes
    ----------
    if you have to include multiple itp files all the [ atomtypes ]
    must be after the force field include and only then you can add the remaining parts of the
    itp files
    """

    input_top_lines = read_file.read_file(input_top_file)

    for i in range(len(input_top_lines)):

        if input_top_lines[i].strip() != '':
            if input_top_lines[i].strip()[0] != ';':

                if input_top_lines[i].strip()[0:8] == '#include':

                    input_top_lines[i] += f'\n#include "{include_line}"\n'

                    break

                # In case there is no FF include (es a parmed genereted topology)
                if input_top_lines[i].strip()[0] == '[' and \
                    input_top_lines[i].split(';')[0].strip().replace(' ', '') != '[defaults]':

                    input_top_lines[
                        i] = f'\n#include "{include_line}"\n' + input_top_lines[
                            i]

                    break

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #7
0
def remove_velocities(input_gro_file, output_gro_file, keep_velocities=None):
    """removes the velocities from a gro file

    Some times gromacs has some problems of instability
    if the velocities are given in the gro file,
    in that case it is better to remove them

    Pay attention if you have a heavy dummy atom, that should
    always have zero velocity, for that use `keep_velocities`

    Parameters
    ------------
    input_gro_file : str
        the name (or path) of the input gro file nr 2
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as one of the input ones
    keep_velocities : list of strings
        the list of residue names (key sensitive) that should
        keep the velocity value (useful for dummy atoms),
        dafalut None no residue keeps its velocity
    """

    input_lines = read_file.read_file(input_gro_file)

    if keep_velocities is None:
        keep_velocities = []

    output_lines = []
    output_lines.append(input_lines[0])
    output_lines.append(input_lines[1])

    for i in range(2, len(input_lines) - 1):

        if input_lines[i][5:10].strip() not in keep_velocities:

            output_lines.append(input_lines[i][:44] + '\n')

        else:

            output_lines.append(input_lines[i])

    output_lines.append(input_lines[-1])

    write_file.write_file(output_lines, output_gro_file)
def add_molecules(name, number, input_top_file, output_top_file):
    """adds a [ molecules ] statement

    Parameters
    -----------
    name : str
        the name of the moelcule
    number : int
        the number of molecules
    input_top_file : str
    output_top_file : str
        can be the same as the input one
    """

    input_top_lines = read_file.read_file(input_top_file)

    input_top_lines.append(f'\n{name}     {number}\n')

    write_file.write_file(input_top_lines, output_top_file)
Beispiel #9
0
def merge_gro_files(input_gro_file_1,
                    input_gro_file_2,
                    output_gro_file,
                    choose_box=1,
                    box_lenghts=None):
    """merge 2 gro files

    can come in handy to mix a particle with a box of water
    for alchemical transformations

    Parameters
    -------------
    input_gro_file_1 : str
        the name (or path) of the input gro file nr 1
    input_gro_file_2 : str
        the name (or path) of the input gro file nr 2
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as one of the input ones
    choose_box : int, optional
        the last line of a gro file contains the box
        lenghts, if `choose_box` = 1 (default) the output
        gro file will have the box values of `input_gro_file_1`
        if `choose_box` = 1 the box values of `input_gro_file_2`
        if `choose_box` = None the function will read `box_lenghts`
    box_lenghts : iterable, optional
        if `choose_box` = None the box lenghts will be read from
        `box_lenghts`, it must be an iterable with 3 floating point
        values. Gromacs measures in nm
    """

    input_lines_1 = read_file.read_file(input_gro_file_1)

    input_lines_2 = read_file.read_file(input_gro_file_2)

    if choose_box == 1:

        box = input_lines_1[-1].strip().split()

    elif choose_box == 2:

        box = input_lines_2[-1].strip().split()

    elif choose_box is None:

        box = []

        for i in range(len(box_lenghts)):

            box.append('{:.5f}'.format(box_lenghts[i]))

    else:
        raise ValueError(f'{choose_box} is not a valid value for choose_box')

    output_lines = ['Merged gro files\n']

    #adds number of atoms
    output_lines.append(' {:>5}\n'.format(int(input_lines_1[1].strip()) + \
        int(input_lines_2[1].strip())))

    atom = 1
    residue = 1
    pevious_residue = '1'
    for lines in (input_lines_1, input_lines_2):

        for i in range(2, len(lines) - 1):

            if lines[i].strip() != '':

                #check for residue change
                if lines[i][:5].strip() != pevious_residue:

                    pevious_residue = lines[i][:5].strip()

                    residue += 1

                output_lines.append('{:>5}{:<5}{:>5}{:>5}{}\n'.format(
                    residue,  #residue number
                    lines[i][5:10],  #residue name
                    lines[i][10:15],  #atom name
                    atom,  #atom number
                    lines[i][20:].strip('\n')))

                #update atom number
                atom += 1

    del input_lines_1
    del input_lines_2

    output_lines.append(2 * ' ' + '  '.join(box) + '\n')

    write_file.write_file(output_lines, output_gro_file)
Beispiel #10
0
def add_atom_to_gro_file(input_gro_file,
                         output_gro_file,
                         coordinates,
                         velocities=None,
                         atom_name='DU',
                         atom_residue_name='DUM'):
    """adds a given atom to a gro file

    comes in handy to add a dummy atom
    remember that unlike PDB files GRO files
    are in nanometers nm!!

    Parameters
    -------------
    input_gro_file : str
        the name (or path) of the input gro file
    output_gro_file : str
        the name (or path) of the input gro file
        can be the same as the input one
    coordinates : iterable of float
        (x,y,z) iterable of any type (list, tuple, ...)
        remember that unlike PDB files GRO files
        are in nanometers nm!!
    velocities : iterable of float, optional
        (vx,vy,vz) iterable of any type (list, tuple, ...)
        for default velocities will be left blank
    atom_name : str
        2 characters atom name
    atom_residue_name :
        2 or 3 characters residue name
    """

    input_lines = read_file.read_file(input_gro_file)

    #adds an atom to the atom count
    input_lines[1] = '{:>5}\n'.format(int(input_lines[1].strip()) + 1)

    for i in range(len(input_lines) - 1, 0, -1):

        if input_lines[i].strip() != '':

            if velocities is not None:

                velocity_string = '{:8.4f}{:8.4f}{:8.4f}'.format(
                    velocities[0], velocities[1], velocities[2])

            else:

                velocity_string = 24 * ' '

            input_lines[i - 1] \
                += '{:>5}{:<5}{:>5}{:>5}{:8.3f}{:8.3f}{:8.3f}{}\n'.format(
                    int(input_lines[i - 1][0:5].strip()) + 1,  #residue number
                    atom_residue_name,  #residue name
                    atom_name,  #atom name
                    int(input_lines[i - 1][15:20].strip()) + 1,  #atom number
                    coordinates[0],
                    coordinates[1],
                    coordinates[2],
                    velocity_string)

            break

    write_file.write_file(input_lines, output_gro_file)
def merge_pdb(input_pdb_1, input_pdb_2, output_pdb):
    """Merge 2 PDB files

    this function is brutal and memory consuming I should do it better in the future
    it adds file 2 to file 1 (the output file will be in order 1-2)

    The header of file 2 will be omitted and that of file 1 untouched

    Parameters
    ------------
    input_pdb_1 : str
    input_pdb_2 : str
    output_pdb : str
        can also be one of the input ones
    """
    # pylint: disable=too-many-branches

    lines_1 = read_file.read_file(input_pdb_1)

    #to be sure each line is long enough (there are plenty of
    # non standard PDB files)
    for i in range(len(lines_1)):

        if len(lines_1[i]) < 78:

            padding_spaces = 78 - len(lines_1[i])

            lines_1[i] += padding_spaces * ' '

    #get the index of the line with the last ATOM HETATM or TER line
    #and get the resnum of this last residue
    for i in range(len(lines_1) - 1, -1, -1):

        # pylint: disable=no-else-break
        if lines_1[i][0:4] == 'ATOM' or lines_1[i][0:6] == 'HETATM':

            residue_number = int(lines_1[i][22:26].strip())

            atom_number = int(lines_1[i][6:11].strip())

            index_protein_file = i + 1

            break

        elif lines_1[i][0:3] == 'TER':

            #some TER lines are non standard and don't contain the residue number
            residue_number = int(lines_1[i - 1][22:26].strip())

            atom_number = int(lines_1[i - 1][6:11].strip())

            index_protein_file = i + 1

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    lines_2 = read_file.read_file(input_pdb_2)

    #to be sure each line is long enough (there are plenty of
    # non standard PDB files)
    for i in range(len(lines_2)):

        if len(lines_2[i]) < 78:

            padding_spaces = 78 - len(lines_2[i])

            lines_2[i] += padding_spaces * ' '

    #find first coordinate line in input_pdb_2
    for i in range(len(lines_2)):

        if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM':

            beginnig_pdb_2 = i

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    for i in range(len(lines_2) - 1, -1, -1):

        # pylint: disable=no-else-break
        if lines_2[i][0:4] == 'ATOM' or lines_2[i][0:6] == 'HETATM':

            end_pdb_2 = i

            break

        elif lines_2[i][0:3] == 'TER':

            end_pdb_2 = i

            break

    else:
        raise ValueError("This PDB doesn't contain coordinates")

    lines_2 = lines_2[beginnig_pdb_2:end_pdb_2 + 1]

    residue_number += 1

    old_residue_number = lines_2[0][22:26]

    for i in range(len(lines_2)):

        atom_number += 1

        if lines_2[i][22:26] != old_residue_number:

            residue_number += 1

        lines_2[i] = lines_2[i][:6] + '{:>5}'.format(atom_number) + lines_2[i][
            11:22] + '{:>4}'.format(residue_number) + lines_2[i][26:78]

    #insert the ligands in the right place of the protein_file list
    lines_1[index_protein_file:index_protein_file] = lines_2

    write_file.write_file(lines=lines_1, file_name=output_pdb)