コード例 #1
0
    def _make_TPR_files_script(self, q_lines, vdw_lines, fsdamdir):
        """
        private
        
        writes a bash script to create the .tpr files in loco on the HPC cluster
        """

        #Q TPR file
        filename = f"{fsdamdir}/MAKE_Q_TPR_FILES.sh"

        string = "#!/bin/bash\n \n##THIS SCRIPT CREATES THE Q TPR FILES RUN IT BEFORE THE WORKLOADMANAGER ONE\n"
        string += "#if you are annihilating make first Q then VDW, if you are creating viceversa\n\n\n"

        string += '\n'.join(q_lines)
        string = string.replace(f'{fsdamdir}/', '')
        write_on_files.write_file(lines=[string], file_name=filename)

        #VDW TPR file
        filename = f"{fsdamdir}/MAKE_VDW_TPR_FILES.sh"

        string = "#!/bin/bash\n \n##THIS SCRIPT CREATES THE VDW TPR FILES RUN IT BEFORE THE WORKLOADMANAGER ONE\n"
        string += "#if you are annihilating make first Q then VDW, if you are creating viceversa\n\n\n"

        string += '\n'.join(vdw_lines)
        string = string.replace(f'{fsdamdir}/', '')
        write_on_files.write_file(lines=[string], file_name=filename)
コード例 #2
0
    def _change_absolute_ligand_itp_include_in_relative(self, top_file):
        """
        private

        changes the #include of the ligand itp file in the protein top from the absolute one in the relative one
        in the given topology file in order to get a meaningful #include in the self.HREM_dir directory
        that will be copied on a different Computer (the HPC cluster)
        """

        lines = read_file.read_file(file_name = top_file)

        ligand_itp_files = []

        for lgand in self.Protein.get_ligand_list():
            ligand_itp_files.append(lgand.itp_file)

        for i in range(len(lines)):

            if lines[i].strip()[:8] == "#include":

                if lines[i].split()[1].replace('"', '').strip() in ligand_itp_files or 'DUM' in lines[i]:

                    itp_file = lines[i].split()[1].replace('"', '').strip()
                    itp_file = itp_file.split("/")[-1]

                    lines[i] = f'#include "{itp_file}"\n'

        write_on_files.write_file(lines = lines, file_name = top_file)
コード例 #3
0
    def _edit_top(ligand_top, only_solvent_top):

        only_solvent_lines = read_file.read_file(file_name=only_solvent_top)

        for i in range(len(only_solvent_lines) - 1, -1, -1):

            if only_solvent_lines[i].strip():

                solvent_line = only_solvent_lines[i].rstrip()

                break

        del only_solvent_lines

        ligand_lines = read_file.read_file(file_name=ligand_top)

        for i in range(len(ligand_lines) - 1, -1, -1):

            if ligand_lines[i].strip():

                ligand_lines[i] = solvent_line + '\n' + ligand_lines[i]

                break

        write_on_files.write_file(lines=ligand_lines, file_name=ligand_top)
コード例 #4
0
def add_chain_id(pdb_file, chain="A"):
    """
    This is a patch because orac and primadorac remove the chain id from
    pdb files and this confuses some pdb parsers (works on PDB files only)

    pdb_file :: string, the pdb file to edit

    chain :: string, default A, the chain id to add to the pdb_file

    returns nothing
    """

    chain = chain.upper().strip()

    lines = read_file.read_file(file_name=pdb_file)

    for i in range(len(lines)):

        if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][
                0:3] == 'TER':

            lines[i] = lines[i][:20] + "{0:>2}".format(chain) + lines[i][22:]

            lines[i] = lines[i].strip('\n') + '\n'

    write_on_files.write_file(lines=lines, file_name=pdb_file)
コード例 #5
0
def _create_input_file():
    """private"""

    file_name = 'tests/files4tests/file_to_append.txt'
    string = "aaa"

    write_on_files.write_file(lines = string, file_name = file_name)

    return file_name, string
コード例 #6
0
    def _write_template_on_file(self):
        """
        private
        
        Writes the object's template on self.mdp_file"""

        lines = ["\n".join(self.template)]

        write_on_files.write_file(lines=lines, file_name=self.mdp_file)
コード例 #7
0
    def test_wrong_input(self):

        wrong_inputs = (1, 1.5)

        with self.assertRaises(TypeError):

            for i in wrong_inputs:
                
                write_on_files.write_file(lines = i, file_name = "tests/files4tests/dummy.txt")

        _clean("tests/files4tests/dummy.txt")
コード例 #8
0
    def _write_template_on_file(self, template=None):
        """
        private

        Writes the objects template on {filename} file
        """

        if template is None:
            template = self.template

        lines = ["\n".join(template)]

        write_on_files.write_file(lines=lines, file_name=self.orac_in_file)
コード例 #9
0
ファイル: hrem.py プロジェクト: MauriceKarrenbrock/HPC_Drug
    def execute(self):

        cpus_per_task = 8
        tasks = self.BATTERIES * self.replicas
        nodes = math.ceil((tasks) / (math.floor(
            (self.cpus_per_node) / (cpus_per_task))))
        wall_time = "24:00:00"
        tasks_per_node = math.floor(self.cpus_per_node / cpus_per_task)
        GPUs = None
        output = "HREM_stdout.out"
        error = "HREM_stderr.err"

        mpirun_string = self.write_orac_mpirun_string()

        slurm = workload_managers.SlurmHeader(nodes=nodes,
                                              tasks=tasks,
                                              tasks_per_node=tasks_per_node,
                                              cpus_per_task=cpus_per_task,
                                              GPUs=GPUs,
                                              wall_time=wall_time,
                                              output=output,
                                              error=error)

        slurm_header = slurm.execute()

        slurm_header.append(mpirun_string)

        slurm_header = ["\n".join(slurm_header)]

        write_on_files.write_file(lines=slurm_header,
                                  file_name=self.HREM_dir + "/" +
                                  f"HREM_input.slr")

        pbs = workload_managers.PBSHeader(nodes=nodes,
                                          tasks=tasks,
                                          tasks_per_node=tasks_per_node,
                                          cpus_per_task=cpus_per_task,
                                          GPUs=GPUs,
                                          wall_time=wall_time,
                                          output=output,
                                          error=error)

        pbs_header = pbs.execute()

        pbs_header.append(mpirun_string)

        pbs_header = ["\n".join(pbs_header)]

        write_on_files.write_file(lines=pbs_header,
                                  file_name=self.HREM_dir + "/" +
                                  f"HREM_input.pbs")
コード例 #10
0
    def test_with_string(self):

        small_string = "string"
        string = f"{small_string}\n{small_string}"
        output_file = "tests/files4tests/test_with_string.txt"

        try:
            write_on_files.write_file(lines = string, file_name = output_file)

            with open(output_file, 'r') as f:
                lines = f.readlines()

            self.assertEqual(lines, [small_string + '\n', small_string])
        finally:

            _clean(output_file)
コード例 #11
0
    def _make_TPR_files_script(self, scaled_topologies):
        """
        private
        
        writes a bash script to create the .tpr files in loco on the HPC cluster
        """

        filename = "MAKE_TPR_FILES.sh"

        string = "#!/bin/bash\n \n##THIS SCRIPT CREATES THE TPR FILES RUN IT BEFORE THE WORKLOADMANAGER ONE\nIT IS FOR A PLUMED PATCHED GROMACS INSTALLATION\n\n\n"

        for i in range(self.BATTERIES):
            for j in range(self.replicas):
                string = string + f"gmx grompp -maxwarn 100 -o BATTERY{i}/scaled{j}/{self.output_tpr_file} -f {self.mdp_file} -p {scaled_topologies[j]} -c {self.Protein.gro_file.rsplit('/', 1)[-1]} \n"


        write_on_files.write_file(lines = [string], file_name = self.HREM_dir + "/" + filename)
コード例 #12
0
    def test_with_tuple(self):

        input_tuples = (("string", "string"), ("string\n", "string\n"))
        expected_ouputs = (["stringstring"], ["string\n", "string\n"])

        output_file = "tests/files4tests/test_with_tuple.txt"

        try:

            for i in range(len(input_tuples)):
                write_on_files.write_file(lines = input_tuples[i], file_name = output_file)

                with open(output_file, 'r') as f:
                    lines = f.readlines()

                self.assertEqual(lines, expected_ouputs[i])
        finally:

            _clean(output_file)
コード例 #13
0
    def _edit_itp(self, ligand_resname, itp_file):
        """
        private

        primadorac itp call any lignd LIG i change it to the ligand_resname

        and removes the first 9 lines of the file (they make gromacs fail)
        """

        lines = read_file.read_file(file_name=itp_file)

        del lines[0:9]

        for i in range(len(lines)):

            lines[i] = lines[i].replace('LIG', ligand_resname)
            lines[i] = lines[i].replace('name-p', ligand_resname)
            lines[i] = lines[i].strip('\n')
            lines[i] = lines[i] + '\n'

        write_on_files.write_file(lines=lines, file_name=itp_file)

        return path.absolute_filepath(path=itp_file)
コード例 #14
0
    def _create_top_file(self):

        Ligand = self.Protein.get_ligand_list()

        for i in range(len(Ligand)):

            top_file = [
                '\n', ';protein ff (needed for the water itp file\n',
                f'#include  "{self.Protein.tpg_file}.ff/forcefield.itp" \n',
                '\n', ';ligand itp file\n'
                f'#include "{Ligand[i].itp_file}"\n', '\n', ';water itp\n',
                f'#include  "{self.Protein.tpg_file}.ff/{self.solvent_model}.itp" \n',
                '\n', "[ system ]\n", "; Name\n", "Protein in water\n", '\n',
                '[ molecules ]\n', '; Compound        #mols\n',
                f'{Ligand[i].resname}              1\n'
            ]

            write_on_files.write_file(lines=top_file,
                                      file_name=os.getcwd() + "/" +
                                      f"{Ligand[i].resname}_only_ligand.top")

            Ligand[i].top_file = os.getcwd(
            ) + "/" + f"{Ligand[i].resname}_only_ligand.top"
コード例 #15
0
    def _edit_top_file(self):
        """
        Private
        
        Adds the needed #include and other informations
        to the protein top file in order
        to include the ligand
        """

        Ligand = self.Protein.get_ligand_list()

        top = read_file.read_file(file_name=self.Protein.top_file)

        itp_insertion_string = ''
        for lgand in Ligand:
            itp_insertion_string = itp_insertion_string + f'#include "{lgand.itp_file}"\n'
            compound_string = f'{lgand.resname}              1 \n'
            top.append(compound_string)

        for i in range(len(top)):
            if top[i].strip() == "":
                pass
            elif top[i].strip()[0] == ";":
                pass
            elif top[i].strip()[0:8] == "#include":
                top[i] = top[i] + '\n' + itp_insertion_string + '\n'
                break
            elif top[i].strip().replace(" ",
                                        "").split(";")[0] == '[moleculetype]':
                top[i] = itp_insertion_string + '\n' + top[i]
                break

        write_on_files.write_file(lines=top, file_name=self.output_top_file)

        self.Protein.top_file = self.output_top_file

        return self.Protein
コード例 #16
0
    def execute(self):
        """This method does not run gromacs but
        creates the input to make a REM simulation on a
        HPC cluster"""

        if self.Protein.get_ligand_list() == []:
            return self.Protein

        #crate an empty file for plumed
        write_on_files.write_file(lines = ['\n'], file_name = "empty_plumed.dat")

        Ligand = self.Protein.get_ligand_list()

        for i in range(len(Ligand)):

            #the input directory that will be created
            self.HREM_dir = f"{Ligand[i].resname}_only_ligand_HREM"

            

            self.elaborated_top_file = f"{Ligand[i].resname}_only_ligand_elaborated_topology.top"

            self.mdp_file = f"{Ligand[i].resname}_only_ligand_HREM.mdp"

            self.output_tpr_file = f"HREM.tpr"

            #creates the REM directory that will be copied to the HPC cluster
            os.makedirs(self.HREM_dir, exist_ok=True)

            #create the BATTERY dirs and the scaled dirs
            for k in range(self.BATTERIES):

                for j in range(self.replicas):

                    os.makedirs(self.HREM_dir + "/" + f"BATTERY{k}" + "/" + f"scaled{j}", exist_ok=True)

                    #copy the empty file for plumed
                    shutil.copy("empty_plumed.dat", self.HREM_dir + "/" + f"BATTERY{k}" + "/" + f"scaled{j}")

            #write the mdp file
            self._write_template_on_file()
            #and copy it in the HREM dir
            shutil.copy(self.mdp_file, self.HREM_dir)

            #create the elaborated top file for plumed
            self._interact_with_gromacs(string = [f"{self.MD_program_path}", "grompp", "-f", f"{self.mdp_file}", "-c", f"{Ligand[i].gro_file}", "-p", f"{Ligand[i].top_file}", "-maxwarn", "100", "-pp", f"{self.elaborated_top_file}"])

            #finds the hot residues and edits the elaborated top file accordingly
            self._edit_top_file(top_file = self.elaborated_top_file)


            #use plumed to make the scaled topologies

            hamiltonian_scaling_values = self._get_hamiltonian_scaling_values(scale=0.1)
            scaled_topologies = []
            for counter, value in enumerate(hamiltonian_scaling_values):
                #they are already saved in the HREM dir
                self._plumed_partial_tempering(scaling_value = value,
                                            input_file = self.elaborated_top_file,
                                            output_file = self.HREM_dir + "/" + f"{Ligand[i].resname}_scaled_{counter}.top")

                scaled_topologies.append(f"{Ligand[i].resname}_scaled_{counter}.top")


            #copy the needed files in the new directories
            for j in (Ligand[i].gro_file, Ligand[i].top_file, Ligand[i].itp_file, self.only_solvent_box_gro, self.only_solvent_box_top):

                shutil.copy(j, self.HREM_dir)

            
            #write workload manager input for different workload managers (slurm pbs ...)
            #already in the self.HREM_dir
            self._write_workloadmanager_inputs()
            

            #make and copy the script that will make the tpr files in loco
            self._make_TPR_files_script(scaled_topologies = scaled_topologies, Ligand = Ligand[i])


            #create a file with important info for post processing of the HREM output
            # key = value
            important_info = [
                f"ligand_resname = {Ligand[i].resname}\n",
                f"ligand_itp = {Ligand[i].itp_file.split('/')[-1]}\n",
                f"top_file = {Ligand[i].top_file.split('/')[-1]}\n",
                f"only_solvent_gro = {self.only_solvent_box_gro.split('/')[-1]}\n",
                f"only_solvent_top = {self.only_solvent_box_top.split('/')[-1]}\n"
            ]

            write_on_files.write_file(lines = important_info, file_name = self.HREM_dir + "/" + "important_info.dat")


            #as gmx2pdb makes some random but needed itp files some times
            #I lazily copy them all
            for file_name in os.listdir(os.getcwd()):

                if file_name[-4:] == ".itp":
                    shutil.copy(file_name, self.HREM_dir)

            #change the absolute #include in relative ones as they would make no sense when the directory will be copied
            #on a different computer (the HPC cluster)
            for file_name in os.listdir(self.HREM_dir):

                if file_name[-4:] == ".top":
                    self._change_absolute_ligand_itp_include_in_relative(top_file = self.HREM_dir + "/" + file_name)



        return self.Protein
コード例 #17
0
    def _edit_top_file(self, top_file = None):
        """PRIVATE"""

        if top_file is None:
            top_file = self.elaborated_top_file

        #get the hot residues
        hot_residues = self.orient.get_hot_residues_for_rem(Protein = self.Protein, Ligand = self.Protein.get_ligand_list(), cutoff = 4.5, residue_dist = 10.0)
        hot_ids = []
        for residue in hot_residues:
            hot_ids.append(str(residue[1]).strip())

        #get the ligand resnames
        ligands_resnames = []
        for lgand in self.Protein.get_ligand_list():
            ligands_resnames.append(lgand.resname)

        #read the topology
        lines = read_file.read_file(file_name = top_file)


        #heat the right atoms

        #auxiliary bool variable
        is_atoms = False
        for i in range(len(lines)):

            #if the line is empty or a comment i can go on with the for loop
            if lines[i].strip() != "":
                if lines[i].strip()[0] != ";":
                
                    tmp_line = lines[i].strip().split()

                else:
                    continue
            
            else:
                continue

            #check if we are in the atoms part
            if lines[i].strip().replace(" ", "").split(";")[0] == "[atoms]":
                is_atoms = True

                continue

            #end of atoms part
            elif tmp_line[0][0] == "[":
                is_atoms = False

                continue

            if is_atoms:

                if len(tmp_line) >= 4:

                    #do not heat solvent
                    if tmp_line[3].strip() == "SOL":
                        continue

                    #heat the ligand
                    if tmp_line[3].strip() in ligands_resnames:

                        tmp_line[1] = tmp_line[1] + "_"

                        lines[i] = " ".join(tmp_line) + "\n"

                    #heat the near residues
                    elif tmp_line[2].strip() in hot_ids:

                        tmp_line[1] = tmp_line[1] + "_"

                        lines[i] = " ".join(tmp_line) + "\n"


        write_on_files.write_file(lines = lines, file_name = top_file)
コード例 #18
0
def merge_pdb(Protein):
    """
    Will put all the given ligands after the protein and update the ligand resnums
    this function is brutal and memory consuming I should do it better in the future

    both the protein and the ligands should be in PDB files (no check will be done)

    Protein :: HPC_Drug.structures.protein.Protein instance with a valid _ligands value

    return Protein with updated Protein.pdb_file
    """

    protein_file = read_file.read_file(file_name=Protein.pdb_file)

    ligands = Protein.get_ligand_list()

    #get the index of the line with the last ATOM HETATM or TER line
    #and get the resnum of this last residue
    for i in range(len(protein_file) - 1, -1, -1):

        if protein_file[i][0:4] == 'ATOM' or protein_file[i][
                0:6] == 'HETATM' or protein_file[i][0:3] == 'TER':

            #some TER lines are non standard and don't contain the residue number
            try:
                residue_number = int(protein_file[i][22:26].strip())
            except:
                residue_number = int(protein_file[i - 1][22:26].strip())

            index_protein_file = i + 1

            break

    #create the ligands list of strings
    ligand_file = []
    for j in range(len(ligands)):

        residue_number = residue_number + 1

        #update resnum
        ligands[j].resnum = residue_number

        tmp_ligand = read_file.read_file(file_name=ligands[j].pdb_file)

        #update the residue numbers in the file
        for k in range(len(tmp_ligand)):

            tmp_ligand[k] = tmp_ligand[k][:22] + "{0:>4}".format(
                residue_number) + tmp_ligand[k][26:]

        ligand_file = ligand_file + tmp_ligand

    #insert the ligands in the right place of the protein_file list
    protein_file[index_protein_file:index_protein_file] = ligand_file

    #be sure to get the right formatting
    for i in range(len(protein_file)):

        protein_file[i] = protein_file[i].strip('\n') + '\n'

    #overwrite Protein.pdb_file
    write_on_files.write_file(lines=protein_file,
                              file_name=f"{Protein.protein_id}_joined.pdb")

    Protein.pdb_file = path.absolute_filepath(
        path=f"{Protein.protein_id}_joined.pdb")

    return Protein
コード例 #19
0
def remove_trash_metal_ions(Protein, trash = important_lists.trash_ions):
    """This function removes unwanted metal ions
    that are still inside the structure after it went through prody
    selection (updates Protein.pdb_file)

    This is a brutal function I will need to do a better job
    
    Protein :: HPC_Drug.structures.protein.Protein instance

    Protein.file_type must be pdb or cif otherwise TypeError will be raised

    return Protein
    """

    def determine_pdb(line, trash = trash):

        #if it is a line containing atom coordinates
        if line[0:4] == 'ATOM' or line[0:6] == 'HETATM' or line[0:3] == 'TER':

            bool_output = line[17:20].strip().upper() not in trash

        #if it is not will not be kept anyway
        else:

            bool_output = False

        return bool_output

    def determine_cif(line, trash = trash):

        _line = line.split()

        #if it is a line containing atom coordinates
        if _line[0].strip() == 'ATOM' or _line[0].strip() == 'HETATM':

            bool_output = _line[5].strip().upper() not in trash

        #if it is not will be kept anyway
        else:

            bool_output = True

        return bool_output

    file_name = Protein.pdb_file

    lines = read_file.read_file(file_name = file_name)

    #keeping only the "good" lines
    if Protein.file_type == 'pdb':

        lines[:] = [x for x in lines if determine_pdb(x)]

        lines.append("end")

    elif Protein.file_type == 'cif':

        lines[:] = [x for x in lines if determine_cif(x)]

    else:
        raise TypeError(f"Protein.file_type must be pdb or cif not {Protein.file_type}") 

    for i in range(len(lines)):
        lines[i] = lines[i].strip() + '\n'

    write_on_files.write_file(lines = lines, file_name = file_name)

    Protein.pdb_file = file_name

    return Protein