Example #1
0
def process_output_fpocket_filter(search_list, tmp_folder, input_pockets_zip,
                                  output_filter_pockets_zip, remove_tmp,
                                  out_log):
    """ Creates the output_filter_pockets_zip """

    # decompress the input_pockets_zip file to tmp_folder
    cluster_list = fu.unzip_list(zip_file=input_pockets_zip,
                                 dest_dir=tmp_folder,
                                 out_log=out_log)

    # list all files of tmp_folder
    pockets_list = [str(i) for i in Path(tmp_folder).iterdir()]

    # select search_list items from pockets_list
    sel_pockets_list = [
        p for p in pockets_list for s in search_list if s + '_' in p
    ]

    fu.log('Creating %s output file' % output_filter_pockets_zip, out_log)

    # compress output to output_filter_pockets_zip
    fu.zip_list(zip_file=output_filter_pockets_zip,
                file_list=sel_pockets_list,
                out_log=out_log)

    if remove_tmp:
        # remove temporary folder
        fu.rm(tmp_folder)
        fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
Example #2
0
    def launch(self) -> int:
        """Execute the :class:`FPocketSelect <fpocket.fpocket_select.FPocketSelect>` fpocket.fpocket_select.FPocketSelect object."""

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # check input/output paths and parameters
        self.check_data_params(out_log, err_log)

        # Check the properties
        fu.check_properties(self, self.properties)

        if self.restart:
            output_file_list = [
                self.io_dict["out"]["output_pocket_pdb"],
                self.io_dict["out"]["output_pocket_pqr"]
            ]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # create tmp_folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, out_log)

        # decompress the input_pockets_zip file to tmp_folder
        all_pockets = fu.unzip_list(
            zip_file=self.io_dict["in"]["input_pockets_zip"],
            dest_dir=self.tmp_folder,
            out_log=out_log)

        pockets_list = [
            i for i in all_pockets if ('pocket' + str(self.pocket)) in i
        ]

        for p in pockets_list:
            if PurePath(p).suffix == '.pdb':
                fu.log(
                    'Saving %s file' %
                    self.io_dict["out"]["output_pocket_pdb"], out_log)
                shutil.copy(p, self.io_dict["out"]["output_pocket_pdb"])
            else:
                fu.log(
                    'Saving %s file' %
                    self.io_dict["out"]["output_pocket_pqr"], out_log)
                shutil.copy(p, self.io_dict["out"]["output_pocket_pqr"])

        if self.remove_tmp:
            # remove temporary folder
            fu.rm(self.tmp_folder)
            fu.log('Removed temporary folder: %s' % self.tmp_folder, out_log)

        return 0
Example #3
0
    def launch(self) -> int:
        """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object."""

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # Check if executable is exists
        if not self.container_path:
            if not Path(self.pmx_path).is_file():
                if not shutil.which(self.pmx_path):
                    raise FileNotFoundError(
                        'Executable %s not found. Check if it is installed in your system and correctly defined in the properties'
                        % self.pmx_path)

        list_a_dir = fu.create_unique_dir()
        list_b_dir = fu.create_unique_dir()
        list_a = list(
            filter(
                lambda f: Path(f).exists() and Path(f).stat().st_size > 10,
                fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir,
                              self.out_log)))
        list_b = list(
            filter(
                lambda f: Path(f).exists() and Path(f).stat().st_size > 10,
                fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir,
                              self.out_log)))
        string_a = " ".join(list_a)
        string_b = " ".join(list_b)

        # Copy extra files to container: two directories containing the xvg files
        if self.container_path:
            shutil.copytree(
                list_a_dir,
                Path(self.stage_io_dict.get("unique_dir")).joinpath(
                    Path(list_a_dir).name))
            shutil.copytree(
                list_b_dir,
                Path(self.stage_io_dict.get("unique_dir")).joinpath(
                    Path(list_b_dir).name))
            container_volume = " " + self.container_volume_path + "/"
            string_a = self.container_volume_path + "/" + container_volume.join(
                list_a)
            string_b = self.container_volume_path + "/" + container_volume.join(
                list_b)

        self.cmd = [
            self.pmx_path, 'analyse', '-fA', string_a, '-fB', string_b, '-o',
            self.stage_io_dict["out"]["output_result_path"], '-w',
            self.stage_io_dict["out"]["output_work_plot_path"]
        ]

        if self.method:
            self.cmd.append('-m')
            self.cmd.append(self.method)
        if self.temperature:
            self.cmd.append('-t')
            self.cmd.append(str(self.temperature))
        if self.nboots:
            self.cmd.append('-b')
            self.cmd.append(str(self.nboots))
        if self.nblocks:
            self.cmd.append('-n')
            self.cmd.append(str(self.nblocks))
        if self.integ_only:
            self.cmd.append('--integ_only')
        if self.reverseB:
            self.cmd.append('--reverseB')
        if self.skip:
            self.cmd.append('--skip')
            self.cmd.append(str(self.skip))
        if self.slice:
            self.cmd.append('--slice')
            self.cmd.append(self.slice)
        if self.rand:
            self.cmd.append('--rand')
        if self.index:
            self.cmd.append('--index')
            self.cmd.append(self.index)
        if self.prec:
            self.cmd.append('--prec')
            self.cmd.append(str(self.prec))
        if self.units:
            self.cmd.append('--units')
            self.cmd.append(self.units)
        if self.no_ks:
            self.cmd.append('--no_ks')
        if self.nbins:
            self.cmd.append('--nbins')
            self.cmd.append(str(self.nbins))
        if self.dpi:
            self.cmd.append('--dpi')
            self.cmd.append(str(self.dpi))

        # Run Biobb block
        self.run_biobb()

        # Copy files to host
        self.copy_to_host()

        self.tmp_files.extend(
            [self.stage_io_dict.get("unique_dir"), list_a_dir, list_b_dir])
        self.remove_tmp_files()

        return self.return_code
Example #4
0
    def launch(self):
        """Launches the execution of the LeapAddIons module."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # Creating temporary folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

        # Water Type
        # leaprc.water.tip4pew, tip4pd, tip3p, spceb, spce, opc, fb4, fb3
        # Values: POL3BOX, QSPCFWBOX, SPCBOX, SPCFWBOX, TIP3PBOX, TIP3PFBOX, TIP4PBOX, TIP4PEWBOX, OPCBOX, OPC3BOX, TIP5PBOX.
        source_wat_command = "source leaprc.water.tip3p"
        if self.water_type == "TIP4PEWBOX":
            source_wat_command = "leaprc.water.tip4pew"
        if self.water_type == "TIP4PBOX":
            source_wat_command = "leaprc.water.tip4pd"
        if re.match(r"SPC", self.water_type):
            source_wat_command = "source leaprc.water.spce"
        if re.match(r"OPC", self.water_type):
            source_wat_command = "source leaprc.water.opc"

        # Counterions
        ions_command = ""
        if self.neutralise:
            #ions_command = ions_command + "addions mol " + self.negative_ions_type + " 0 \n"
            #ions_command = ions_command + "addions mol " + self.positive_ions_type + " 0 \n"
            ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " 0 \n"
            ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " 0 \n"

        if self.ionic_concentration and self.negative_ions_number==0 and self.positive_ions_number==0:
            self.find_out_number_of_ions()
            nneg = self.nio # Update with function
            npos = self.nio # Update with function
            #ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str(nneg) + " \n"
            #ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str(npos) + " \n"
            ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " " + str(nneg) + " \n"
            ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " " + str(npos) + " \n"
        else:
            if self.negative_ions_number != 0:
                #ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str(self.negative_ions_number) + " \n"
                ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " " + str(self.negative_ions_number) + " \n"
            if self.positive_ions_number != 0:
                #ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str(self.positive_ions_number) + " \n"
                ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " " + str(self.positive_ions_number) + " \n"

        ligands_lib_list = []
        if self.io_dict['in']['input_lib_path'] is not None:
            if self.io_dict['in']['input_lib_path'].endswith('.zip'):
                ligands_lib_list = fu.unzip_list(self.io_dict['in']['input_lib_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                ligands_lib_list.append(self.io_dict['in']['input_lib_path'])

        ligands_frcmod_list = []
        if self.io_dict['in']['input_frcmod_path'] is not None:
            if self.io_dict['in']['input_frcmod_path'].endswith('.zip'):
                ligands_frcmod_list = fu.unzip_list(self.io_dict['in']['input_frcmod_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                ligands_frcmod_list.append(self.io_dict['in']['input_frcmod_path'])

        amber_params_list = []
        if self.io_dict['in']['input_params_path'] is not None:
            if self.io_dict['in']['input_params_path'].endswith('.zip'):
                amber_params_list = fu.unzip_list(self.io_dict['in']['input_params_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                amber_params_list.append(self.io_dict['in']['input_params_path'])

        leap_source_list = []
        if self.io_dict['in']['input_source_path'] is not None:
            if self.io_dict['in']['input_source_path'].endswith('.zip'):
                leap_source_list = fu.unzip_list(self.io_dict['in']['input_source_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                leap_source_list.append(self.io_dict['in']['input_source_path'])

        instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in"))
        with open(instructions_file, 'w') as leapin:
                # Forcefields loaded by default:
                # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA)
                #leapin.write("source leaprc.protein.ff14SB \n")
                # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016)
                #leapin.write("source leaprc.DNA.bsc1 \n")
                # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74)
                #leapin.write("source leaprc.gaff \n")

                # Forcefields loaded from input forcefield property
                for t in self.forcefield:
                    leapin.write("source leaprc.{}\n".format(t))

                # Additional Leap commands
                for leap_commands in leap_source_list:
                    leapin.write("source " + leap_commands + "\n")

                # Water Model loaded from input water_model property
                leapin.write(source_wat_command + " \n")

                # Ions Type
                if self.ions_type != "None":
                    leapin.write("loadamberparams frcmod." + self.ions_type + "\n")

                # Additional Amber parameters
                for amber_params in amber_params_list:
                    leapin.write("loadamberparams " + amber_params + "\n")

                # Ligand(s) libraries (if any)
                for amber_lib in ligands_lib_list:
                    leapin.write("loadOff " + amber_lib + "\n")
                for amber_frcmod in ligands_frcmod_list:
                    leapin.write("loadamberparams " + amber_frcmod + "\n")

                # Loading PDB file
                leapin.write("mol = loadpdb " + self.io_dict['in']['input_pdb_path'] + " \n")

                # Adding ions
                leapin.write(ions_command)

                # Generating box
                leapin.write("setBox mol vdw \n")

                # Saving output PDB file, coordinates and topology
                leapin.write("savepdb mol " + self.io_dict['out']['output_pdb_path'] + " \n")
                leapin.write("saveAmberParm mol " + self.io_dict['out']['output_top_path'] + " " + self.io_dict['out']['output_crd_path'] + "\n")
                leapin.write("quit \n");

        # Command line
        self.cmd = ['tleap ',
               '-f', instructions_file
               ]

        # Run Biobb block
        self.run_biobb()

        # Copy files to host
        self.copy_to_host()

        if self.box_type != "cubic":
            fu.log('Fixing truncated octahedron Box in the topology and coordinates files', self.out_log, self.global_log)

            # Taking box info from input PDB file, CRYST1 tag (first line)
            with open(self.io_dict['in']['input_pdb_path']) as file:
                lines = file.readlines()
                pdb_line = lines[0]

            if 'OCTBOX' not in pdb_line:
                fu.log('WARNING: box info not found in input PDB file (OCTBOX). Needed to correctly assign the octahedron box. Assuming cubic box.',self.out_log, self.global_log)
            else:
                # PDB info: CRYST1   86.316   86.316   86.316 109.47 109.47 109.47 P 1
                # PDB info: OCTBOX   86.1942924  86.1942924  86.1942924 109.4712190 109.4712190 109.4712190
                #regex_box = 'CRYST1\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*P 1'
                regex_box = 'OCTBOX\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)'
                box = re.findall(regex_box, pdb_line)[0]
                box_line = ""
                for coord in box:
                    box_line += "{:12.7f}".format(float(coord))

                # PRMTOP info: 1.09471219E+02  8.63157502E+01  8.63157502E+01  8.63157502E+01
                top_box_line = ""
                top_box_line += '  %.8E' % Decimal(float(box[3]))
                top_box_line += '  %.8E' % Decimal(float(box[0]))
                top_box_line += '  %.8E' % Decimal(float(box[1]))
                top_box_line += '  %.8E' % Decimal(float(box[2]))

                # Removing box generated by tleap from the crd file (last line)
                with open(self.io_dict['out']['output_crd_path']) as file:
                    lines = file.readlines()
                    crd_lines = lines[:-1]

                # Adding old box coordinates (taken from the input pdb)
                crd_lines.append(box_line)

                with open(self.io_dict['out']['output_crd_path'],'w') as file:
                    for line in crd_lines:
                        file.write(str(line))
                    file.write("\n")

                # Now fixing IFBOX param in prmtop.
                box_flag = False
                ifbox_flag = 0
                #%FLAG BOX_DIMENSIONS
                #%FORMAT(5E16.8)
                #1.09471219E+02  8.63157502E+01  8.63157502E+01  8.63157502E+01

                tmp_parmtop = str(PurePath(self.tmp_folder).joinpath("top_temp.parmtop"))
                shutil.copyfile(self.io_dict['out']['output_top_path'], tmp_parmtop)

                with open(self.io_dict['out']['output_top_path'],'w') as new_top:
                    with open(tmp_parmtop,'r') as old_top:
                        for line in old_top:
                            if 'BOX_DIMENSIONS' in line:
                                box_flag = True
                                new_top.write(line)
                            elif box_flag and 'FORMAT' not in line:
                                new_top.write(top_box_line + "\n")
                                box_flag = False
                            elif 'FLAG POINTERS' in line or ifbox_flag==1 or ifbox_flag==2 or ifbox_flag==3:
                                ifbox_flag+=1
                                new_top.write(line)
                            elif ifbox_flag == 4:
                                #new_top.write(top_box_line + "\n")
                                new_top.write(line[:56] + '       2' + line[64:])
                                ifbox_flag+=1
                            else:
                                new_top.write(line)

        # remove temporary folder(s)
        if self.remove_tmp:
            self.tmp_files.append(self.tmp_folder)
            self.tmp_files.append("leap.log")
            self.remove_tmp_files()

        return self.return_code
Example #5
0
    def launch(self):
        """Launches the execution of the LeapSolvate module."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # Creating temporary folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

        # Leap configuration (instructions) file
        instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in"))

        box_command = "solvateOct"
        if self.box_type == "cubic":
            box_command = "solvateBox"

        # Forcefield
        #source_ff_command = "source leaprc." + self.forcefield

        # Water Type
        # leaprc.water.tip4pew, tip4pd, tip3p, spceb, spce, opc, fb4, fb3
        # Values: POL3BOX, QSPCFWBOX, SPCBOX, SPCFWBOX, TIP3PBOX, TIP3PFBOX, TIP4PBOX, TIP4PEWBOX, OPCBOX, OPC3BOX, TIP5PBOX.
        source_wat_command = "source leaprc.water.tip3p"
        if self.water_type == "TIP4PEWBOX":
            source_wat_command = "leaprc.water.tip4pew"
        if self.water_type == "TIP4PBOX":
            source_wat_command = "leaprc.water.tip4pd"
        if re.match(r"SPC", self.water_type):
            source_wat_command = "source leaprc.water.spce"
        if re.match(r"OPC", self.water_type):
            source_wat_command = "source leaprc.water.opc"

        # Counterions
        ions_command = ""
        if self.neutralise:
            ions_command = ions_command + "addions mol " + self.negative_ions_type + " 0 \n"
            ions_command = ions_command + "addions mol " + self.positive_ions_type + " 0 \n"

        if self.negative_ions_number != 0:
            ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str(
                self.negative_ions_number) + " \n"
        if self.positive_ions_number != 0:
            ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str(
                self.positive_ions_number) + " \n"

        ligands_lib_list = []
        if self.io_dict['in']['input_lib_path'] is not None:
            if self.io_dict['in']['input_lib_path'].endswith('.zip'):
                ligands_lib_list = fu.unzip_list(
                    self.io_dict['in']['input_lib_path'],
                    dest_dir=self.tmp_folder,
                    out_log=self.out_log)
            else:
                ligands_lib_list.append(self.io_dict['in']['input_lib_path'])

        ligands_frcmod_list = []
        if self.io_dict['in']['input_frcmod_path'] is not None:
            if self.io_dict['in']['input_frcmod_path'].endswith('.zip'):
                ligands_frcmod_list = fu.unzip_list(
                    self.io_dict['in']['input_frcmod_path'],
                    dest_dir=self.tmp_folder,
                    out_log=self.out_log)
            else:
                ligands_frcmod_list.append(
                    self.io_dict['in']['input_frcmod_path'])

        amber_params_list = []
        if self.io_dict['in']['input_params_path'] is not None:
            if self.io_dict['in']['input_params_path'].endswith('.zip'):
                amber_params_list = fu.unzip_list(
                    self.io_dict['in']['input_params_path'],
                    dest_dir=self.tmp_folder,
                    out_log=self.out_log)
            else:
                amber_params_list.append(
                    self.io_dict['in']['input_params_path'])

        leap_source_list = []
        if self.io_dict['in']['input_source_path'] is not None:
            if self.io_dict['in']['input_source_path'].endswith('.zip'):
                leap_source_list = fu.unzip_list(
                    self.io_dict['in']['input_source_path'],
                    dest_dir=self.tmp_folder,
                    out_log=self.out_log)
            else:
                leap_source_list.append(
                    self.io_dict['in']['input_source_path'])

        with open(instructions_file, 'w') as leapin:
            # Forcefields loaded by default:
            # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA)
            #leapin.write("source leaprc.protein.ff14SB \n")
            # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016)
            #leapin.write("source leaprc.DNA.bsc1 \n")
            # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74)
            #leapin.write("source leaprc.gaff \n")

            # Forcefields loaded from input forcefield property
            for t in self.forcefield:
                leapin.write("source leaprc.{}\n".format(t))

            # Additional Leap commands
            for leap_commands in leap_source_list:
                leapin.write("source " + leap_commands + "\n")

            # Ions Type
            if self.ions_type != "None":
                leapin.write("loadamberparams frcmod." + self.ions_type + "\n")

            # Additional Amber parameters
            for amber_params in amber_params_list:
                leapin.write("loadamberparams " + amber_params + "\n")

            # Water Model loaded from input water_model property
            leapin.write(source_wat_command + " \n")

            # Ligand(s) libraries (if any)
            for amber_lib in ligands_lib_list:
                leapin.write("loadOff " + amber_lib + "\n")
            for amber_frcmod in ligands_frcmod_list:
                leapin.write("loadamberparams " + amber_frcmod + "\n")

            # Loading PDB file
            leapin.write("mol = loadpdb " +
                         self.io_dict['in']['input_pdb_path'] + " \n")

            # Generating box + adding water molecules
            leapin.write(box_command + " mol " + self.water_type + " " +
                         str(self.distance_to_molecule) + " " +
                         str(self.closeness))
            leapin.write(" iso \n") if self.iso else leapin.write("\n")

            # Adding counterions
            leapin.write(ions_command)

            # Saving output PDB file, coordinates and topology
            leapin.write("savepdb mol " +
                         self.io_dict['out']['output_pdb_path'] + " \n")
            leapin.write("saveAmberParm mol " +
                         self.io_dict['out']['output_top_path'] + " " +
                         self.io_dict['out']['output_crd_path'] + "\n")
            leapin.write("quit \n")

        # Command line
        self.cmd = ['tleap ', '-f', instructions_file]

        # Run Biobb block
        self.run_biobb()

        # Copy files to host
        self.copy_to_host()

        # Saving octahedron box with all decimals in PDB file. Needed for the add_ions BB.

        # Getting octahedron box from generated crd file
        with open(self.io_dict['out']['output_crd_path'], "r") as file:
            for line in file:
                pass

        # Adding box as a first line in the generated pdb file with OCTBOX tag
        octbox = "OCTBOX " + line
        with open(self.io_dict['out']['output_pdb_path'], 'r+') as f:
            content = f.read()
            f.seek(0, 0)
            f.write(octbox + content)

        # remove temporary folder(s)
        if self.remove_tmp:
            self.tmp_files.append(self.tmp_folder)
            self.tmp_files.append("leap.log")
            self.remove_tmp_files()

        return self.return_code
Example #6
0
    def launch(self) -> int:
        """Execute the :class:`BindingSite <utils.bindingsite.BindingSite>` utils.bindingsite.BindingSite object."""

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # check input/output paths and parameters
        self.check_data_params(out_log, err_log)

        # Check the properties
        fu.check_properties(self, self.properties)

        if self.restart:
            output_file_list = [self.io_dict["out"]["output_pdb_path"]]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # Parse structure
        fu.log(
            'Loading input PDB structure %s' %
            (self.io_dict["in"]["input_pdb_path"]), out_log, self.global_log)
        structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name
        parser = Bio.PDB.PDBParser(QUIET=True)
        structPDB = parser.get_structure(structure_name,
                                         self.io_dict["in"]["input_pdb_path"])

        if len(structPDB):
            structPDB = structPDB[0]

        # Use only one chain
        n_chains = structPDB.get_list()
        if len(n_chains) != 1:
            fu.log(
                'More than one chain found in the input PDB structure. Using only the first chain to find the binding site',
                out_log, self.global_log)
            # get first chain in case there is more than one chain
            for struct_chain in structPDB.get_chains():
                structPDB = struct_chain

        # Get AA sequence
        structPDB_seq = get_pdb_sequence(structPDB)
        if len(structPDB_seq) == 0:
            fu.log(
                self.__class__.__name__ +
                ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?'
                % self.io_dict["in"]["input_pdb_path"], out_log)
            raise SystemExit(
                self.__class__.__name__ +
                ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?'
                % self.io_dict["in"]["input_pdb_path"])
        else:
            fu.log(
                'Found %s residues in %s' %
                (len(structPDB_seq), self.io_dict["in"]["input_pdb_path"]),
                out_log)

        # create temporary folder for decompressing the input_clusters_zip file
        unique_dir = PurePath(fu.create_unique_dir())
        fu.log('Creating %s temporary folder' % unique_dir, out_log,
               self.global_log)

        # decompress the input_clusters_zip file
        cluster_list = fu.unzip_list(
            zip_file=self.io_dict["in"]["input_clusters_zip"],
            dest_dir=unique_dir,
            out_log=out_log)

        clusterPDB_ligands_aligned = []
        clusterPDB_ligands_num = 0

        fu.log('Iterating on all clusters:', out_log)

        for idx, cluster_path in enumerate(cluster_list):

            cluster_name = PurePath(cluster_path).stem
            fu.log(' ', out_log)
            fu.log('------------ Iteration #%s --------------' % (idx + 1),
                   out_log)
            fu.log('Cluster member: %s' % cluster_name, out_log)

            # Load and Parse PDB
            clusterPDB = {}
            clusterPDB = parser.get_structure(cluster_name, cluster_path)[0]

            # Use only the first chain
            for cluster_chain in clusterPDB.get_chains():
                clusterPDB = cluster_chain

            # Looking for ligands
            clusterPDB_ligands = get_ligand_residues(clusterPDB)
            if (len(clusterPDB_ligands)) == 0:
                fu.log(
                    'No ligands found that could guide the binding site search. Ignoring this member: %s'
                    % cluster_name, out_log)
                continue

            # Selecting the largest ligand, if more than one
            lig_atoms_num = 0
            clusterPDB_ligand = {}
            if self.ligand:
                if self.ligand in [
                        x.get_resname() for x in clusterPDB_ligands
                ]:
                    for lig in clusterPDB_ligands:
                        if lig.get_resname() == self.ligand:
                            clusterPDB_ligand = lig
                            lig_atoms_num = len(lig.get_list())
                            fu.log(
                                'Ligand found: %s  (%s atoms)' %
                                (lig.get_resname(), lig_atoms_num), out_log)
                else:
                    fu.log(
                        'Ligand %s not found in %s cluster member, skipping this cluster'
                        % (self.ligand, cluster_name), out_log)
                    continue
            else:
                if len(clusterPDB_ligands) > 1:
                    for lig_res in clusterPDB_ligands:
                        lig_res_atoms_num = len(lig_res.get_list())
                        fu.log(
                            'Ligand found: %s  (%s atoms)' %
                            (lig_res.get_resname(), lig_res_atoms_num),
                            out_log)
                        if lig_res_atoms_num > lig_atoms_num:
                            clusterPDB_ligand = lig_res
                            lig_atoms_num = lig_res_atoms_num
                else:
                    clusterPDB_ligand = clusterPDB_ligands[0]
                    lig_atoms_num = len(clusterPDB_ligands[0].get_list())

            fu.log(
                'Member accepted. Valid ligand found: %s (%s atoms)' %
                (clusterPDB_ligand.get_resname(), lig_atoms_num), out_log)

            ## Mapping residues by sequence alignment to match structPDB-clusterPDB paired residues

            # Get AA sequence
            clusterPDB_seq = get_pdb_sequence(clusterPDB)

            # Pairwise align
            aln, residue_map = align_sequences(structPDB_seq, clusterPDB_seq,
                                               self.matrix_name, self.gap_open,
                                               self.gap_extend)
            fu.log(
                'Matching residues to input PDB structure. Alignment is:\n%s' %
                (aln[1]), out_log)

            # Calculate (gapless) sequence identity
            seq_identity, gap_seq_identity = calculate_alignment_identity(
                aln[0], aln[1])
            fu.log('Sequence identity (%%): %s' % (seq_identity), out_log)
            fu.log('Gap less identity (%%): %s' % (gap_seq_identity), out_log)

            ## Selecting aligned CA atoms from first model, first chain

            struct_atoms = []
            cluster_atoms = []

            for struct_res in residue_map:
                try:
                    cluster_atoms.append(
                        clusterPDB[residue_map[struct_res]]['CA'])
                    struct_atoms.append(
                        get_residue_by_id(structPDB, struct_res)['CA'])
                except KeyError:
                    fu.log(
                        'Cannot find CA atom for residue %s  (input PDB  %s)' %
                        (get_residue_by_id(
                            structPDB, struct_res).get_resname(), struct_res),
                        out_log)
                    pass

            if len(cluster_atoms) == 0:
                fu.log(
                    self.__class__.__name__ +
                    ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.'
                    % (cluster_name, structure_name), out_log)
                raise SystemExit(
                    self.__class__.__name__ +
                    ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.'
                    % (cluster_name, structure_name))
            else:
                fu.log(
                    'Superimposing %s aligned protein residues' %
                    (len(cluster_atoms)), out_log)

            ## Align against input structure

            si = Bio.PDB.Superimposer()
            si.set_atoms(struct_atoms, cluster_atoms)
            si.apply(clusterPDB.get_atoms())
            fu.log('RMSD: %s' % (si.rms), out_log)

            # Save transformed structure (and ligand)
            clusterPDB_ligand_aligned = clusterPDB[clusterPDB_ligand.get_id()]
            fu.log('Saving transformed ligand coordinates', out_log)

            clusterPDB_ligands_aligned.append(clusterPDB_ligand_aligned)

            ##  Stop after n accepted cluster members

            clusterPDB_ligands_num += 1

            if clusterPDB_ligands_num > self.max_num_ligands:
                break

        fu.log(' ', out_log)
        fu.log('----------------------------------------', out_log)
        fu.log(
            'All transformed ligand coordinates saved, getting binding site residues',
            out_log)

        ## Select binding site atoms as those around cluster superimposed ligands

        fu.log(
            'Defining binding site residues as those %sÅ around the %s cluster superimposed ligands'
            % (self.radius, clusterPDB_ligands_num), out_log)

        # select Atoms from aligned ligands
        clusterPDB_ligands_aligned2 = [
            res for res in clusterPDB_ligands_aligned
        ]
        clusterPDB_ligands_aligned_atoms = Bio.PDB.Selection.unfold_entities(
            clusterPDB_ligands_aligned2, 'A')

        # select Atoms from input PDB structure
        structPDB_atoms = [atom for atom in structPDB.get_atoms()]

        # compute neighbors for aligned ligands in the input PDB structure
        structPDB_bs_residues_raw = {}
        structPDB_neighbors = Bio.PDB.NeighborSearch(structPDB_atoms)
        for ligand_atom in clusterPDB_ligands_aligned_atoms:
            # look for PDB atoms 5A around each ligand atom
            k_l = structPDB_neighbors.search(ligand_atom.coord, self.radius,
                                             'R')
            for k in k_l:
                structPDB_bs_residues_raw[k.get_id()] = k.get_full_id()

        ## Save binding site to PDB

        io = Bio.PDB.PDBIO()
        fu.log(
            'Writing binding site residues into %s' %
            (self.io_dict["out"]["output_pdb_path"]), out_log)

        # unselect input PDB atoms not in binding site
        structPDB_bs_atoms = 0
        p = re.compile('H_|W_|W')
        residue_ids_to_remove = []
        for res in structPDB.get_residues():
            if res.id not in structPDB_bs_residues_raw.keys():
                # add residue to residue_ids_to_remove list
                residue_ids_to_remove.append(res.id)
            elif p.match(res.resname):
                # add residue to residue_ids_to_remove list
                residue_ids_to_remove.append(res.id)
            else:
                # this residue will be preserved
                structPDB_bs_atoms += len(res.get_list())

        # unselect input PDB atoms not in binding site
        for chain in structPDB:
            for idr in residue_ids_to_remove:
                chain.detach_child(idr)

        # write PDB file
        io.set_structure(structPDB)
        io.save(self.io_dict["out"]["output_pdb_path"])

        if self.remove_tmp:
            # remove temporary folder
            fu.rm(unique_dir)
            fu.log(' ', out_log)
            fu.log('----------------------------------------', out_log)
            fu.log('Removed temporary folder: %s' % unique_dir, out_log)

        return 0
Example #7
0
    def launch(self):
        """Launches the execution of the LeapGenTop module."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # Creating temporary folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log)

        # Leap configuration (instructions) file
        instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in"))

        ligands_lib_list = []
        if self.io_dict['in']['input_lib_path'] is not None:
            if self.io_dict['in']['input_lib_path'].endswith('.zip'):
                ligands_lib_list = fu.unzip_list(self.io_dict['in']['input_lib_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                ligands_lib_list.append(self.io_dict['in']['input_lib_path'])

        ligands_frcmod_list = []
        if self.io_dict['in']['input_frcmod_path'] is not None:
            if self.io_dict['in']['input_frcmod_path'].endswith('.zip'):
                ligands_frcmod_list = fu.unzip_list(self.io_dict['in']['input_frcmod_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                ligands_frcmod_list.append(self.io_dict['in']['input_frcmod_path'])

        amber_params_list = []
        if self.io_dict['in']['input_params_path'] is not None:
            if self.io_dict['in']['input_params_path'].endswith('.zip'):
                amber_params_list = fu.unzip_list(self.io_dict['in']['input_params_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                amber_params_list.append(self.io_dict['in']['input_params_path'])

        leap_source_list = []
        if self.io_dict['in']['input_source_path'] is not None:
            if self.io_dict['in']['input_source_path'].endswith('.zip'):
                leap_source_list = fu.unzip_list(self.io_dict['in']['input_source_path'], dest_dir=self.tmp_folder, out_log=self.out_log)
            else:
                leap_source_list.append(self.io_dict['in']['input_source_path'])

        with open(instructions_file, 'w') as leapin:
                # Forcefields loaded by default:
                # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA)
                #leapin.write("source leaprc.protein.ff14SB \n")
                # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016)
                #leapin.write("source leaprc.DNA.bsc1 \n")
                # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74)
                #leapin.write("source leaprc.gaff \n")

                # Forcefields loaded from input forcefield property
                for t in self.forcefield:
                    leapin.write("source leaprc.{}\n".format(t))

                # Additional Leap commands
                for leap_commands in leap_source_list:
                    leapin.write("source " + leap_commands + "\n")

                # Additional Amber parameters
                for amber_params in amber_params_list:
                    leapin.write("loadamberparams " + amber_params + "\n")

                # Ions libraries
                leapin.write("loadOff atomic_ions.lib \n")
                
                # Ligand(s) libraries (if any)
                for amber_lib in ligands_lib_list:
                    leapin.write("loadOff " + amber_lib + "\n")
                for amber_frcmod in ligands_frcmod_list:
                    leapin.write("loadamberparams " + amber_frcmod + "\n")

                # Loading PDB file
                leapin.write("mol = loadpdb " + self.io_dict['in']['input_pdb_path'] + " \n")

                # Saving output PDB file, coordinates and topology
                leapin.write("savepdb mol " + self.io_dict['out']['output_pdb_path'] + " \n")
                leapin.write("saveAmberParm mol " + self.io_dict['out']['output_top_path'] + " " + self.io_dict['out']['output_crd_path'] + "\n")
                leapin.write("quit \n");

        # Command line
        self.cmd = ['tleap ',
               '-f', instructions_file
               ]

        # Run Biobb block
        self.run_biobb()

        # Copy files to host
        self.copy_to_host()

        # remove temporary folder(s)
        if self.remove_tmp:
            self.tmp_files.append(self.tmp_folder)
            self.tmp_files.append("leap.log")
            self.remove_tmp_files()

        return self.return_code