Esempio n. 1
0
def process_output_fpocket_filter(search_list, tmp_folder, input_pockets_zip,
                                  output_filter_pockets_zip, remove_tmp,
                                  out_log):
    """ Creates the output_filter_pockets_zip """

    # decompress the input_pockets_zip file to tmp_folder
    cluster_list = fu.unzip_list(zip_file=input_pockets_zip,
                                 dest_dir=tmp_folder,
                                 out_log=out_log)

    # list all files of tmp_folder
    pockets_list = [str(i) for i in Path(tmp_folder).iterdir()]

    # select search_list items from pockets_list
    sel_pockets_list = [
        p for p in pockets_list for s in search_list if s + '_' in p
    ]

    fu.log('Creating %s output file' % output_filter_pockets_zip, out_log)

    # compress output to output_filter_pockets_zip
    fu.zip_list(zip_file=output_filter_pockets_zip,
                file_list=sel_pockets_list,
                out_log=out_log)

    if remove_tmp:
        # remove temporary folder
        fu.rm(tmp_folder)
        fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
Esempio n. 2
0
def process_output_gmx(unique_name, files_folder, remove_tmp, basename,
                       class_params, output_files, out_log):
    """ Moves and removes temporal files generated by the wrapper """
    path = files_folder
    suffix = class_params['suffix']
    src_files = glob.glob(path + '/' + basename + '.' + unique_name + suffix +
                          '*')

    # copy files for the requested topology to the output_path
    for file_name in src_files:
        # replace random name by original name in all files
        with fileinput.FileInput(file_name, inplace=True) as file:
            for line in file:
                print(line.replace(basename + '.' + unique_name, basename),
                      end='')

        if (Path(file_name).is_file()):
            file_extension = PurePath(file_name).suffix
            # in top files for gromacs, replace file.itp by name given by user
            if (file_extension[1:] == 'top'):
                with open(file_name) as f:
                    newText = f.read().replace(
                        basename + '_GMX.itp',
                        PurePath(output_files['itp']).name)
                with open(file_name, "w") as f:
                    f.write(newText)
            shutil.copy(file_name, output_files[file_extension[1:]])
            fu.log(
                'File %s succesfully created' %
                output_files[file_extension[1:]], out_log)

    if remove_tmp:
        # remove temporary folder
        fu.rm(files_folder)
        fu.log('Removed temporary folder: %s' % files_folder, out_log)
Esempio n. 3
0
    def launch(self) -> int:
        """Execute the :class:`PdbClusterZip <api.pdb_cluster_zip.PdbClusterZip>` api.pdb_cluster_zip.PdbClusterZip object."""
        
        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        check_mandatory_property(self.pdb_code, 'pdb_code', self.out_log, self.__class__.__name__)

        self.pdb_code = self.pdb_code.strip().lower()

        file_list = []
        #Downloading PDB_files
        pdb_code_list = get_cluster_pdb_codes(pdb_code=self.pdb_code, cluster=self.cluster, out_log=self.out_log, global_log=self.global_log)
        unique_dir = fu.create_unique_dir()
        for pdb_code in pdb_code_list:
            pdb_file = os.path.join(unique_dir, pdb_code+".pdb")
            pdb_string = download_pdb(pdb_code=pdb_code, api_id=self.api_id, out_log=self.out_log, global_log=self.global_log)
            write_pdb(pdb_string, pdb_file, self.filter, self.out_log, self.global_log)
            file_list.append(os.path.abspath(pdb_file))

        #Zipping files
        fu.log("Zipping the pdb files to: %s" % self.output_pdb_zip_path)
        fu.zip_list(self.output_pdb_zip_path, file_list, out_log=self.out_log)

        if self.remove_tmp:
            # remove temporary folder
            fu.rm(unique_dir)
            fu.log('Removed temporary folder: %s' % unique_dir, self.out_log)

        return 0
Esempio n. 4
0
def process_output(unique_name, files_folder, remove_tmp, basename,
                   class_params, output_files, out_log):
    """ Moves and removes temporal files generated by the wrapper """
    path = files_folder
    suffix = class_params['suffix']
    src_files = glob.glob(path + '/' + basename + '.' + unique_name + suffix +
                          '*')

    # copy files for the requested topology to the output_path
    for file_name in src_files:
        # replace random name by original name in all files
        with fileinput.FileInput(file_name, inplace=True) as file:
            for line in file:
                print(line.replace(basename + '.' + unique_name, basename),
                      end='')

        if (Path(file_name).is_file()):
            file_extension = PurePath(file_name).suffix
            shutil.copy(file_name, output_files[file_extension[1:]])
            fu.log(
                'File %s succesfully created' %
                output_files[file_extension[1:]], out_log)

    if remove_tmp:
        # remove temporary folder
        fu.rm(files_folder)
        fu.log('Removed temporary folder: %s' % files_folder, out_log)
Esempio n. 5
0
    def launch(self) -> int:
        """Execute the :class:`FPocketSelect <fpocket.fpocket_select.FPocketSelect>` fpocket.fpocket_select.FPocketSelect object."""

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # check input/output paths and parameters
        self.check_data_params(out_log, err_log)

        # Check the properties
        fu.check_properties(self, self.properties)

        if self.restart:
            output_file_list = [
                self.io_dict["out"]["output_pocket_pdb"],
                self.io_dict["out"]["output_pocket_pqr"]
            ]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # create tmp_folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, out_log)

        # decompress the input_pockets_zip file to tmp_folder
        all_pockets = fu.unzip_list(
            zip_file=self.io_dict["in"]["input_pockets_zip"],
            dest_dir=self.tmp_folder,
            out_log=out_log)

        pockets_list = [
            i for i in all_pockets if ('pocket' + str(self.pocket)) in i
        ]

        for p in pockets_list:
            if PurePath(p).suffix == '.pdb':
                fu.log(
                    'Saving %s file' %
                    self.io_dict["out"]["output_pocket_pdb"], out_log)
                shutil.copy(p, self.io_dict["out"]["output_pocket_pdb"])
            else:
                fu.log(
                    'Saving %s file' %
                    self.io_dict["out"]["output_pocket_pqr"], out_log)
                shutil.copy(p, self.io_dict["out"]["output_pocket_pqr"])

        if self.remove_tmp:
            # remove temporary folder
            fu.rm(self.tmp_folder)
            fu.log('Removed temporary folder: %s' % self.tmp_folder, out_log)

        return 0
Esempio n. 6
0
def process_output_trjconv_str_ens(tmp_folder, remove_tmp, output_file,
                                   glob_pattern, out_log):
    """ Compresses, moves and removes temporal files generated by the wrapper """
    # list all files in temporary folder
    tmp_fl = list(Path(tmp_folder).glob(glob_pattern))
    files_list = []
    for file_name in tmp_fl:
        files_list.append(file_name)

    # adding files from temporary folder to zip
    fu.zip_list(output_file, files_list, out_log)

    if remove_tmp:
        # remove temporary folder
        fu.rm(tmp_folder)
        fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
Esempio n. 7
0
def remove_tmp_files(list,
                     remove_tmp,
                     out_log,
                     input_top_path_orig=None,
                     input_top_path=None):
    """ Removes temporal files generated by the wrapper """
    tmp_files = list
    if zipfile.is_zipfile(input_top_path_orig):
        tmp_files.append(PurePath(input_top_path).parent)

    if remove_tmp:
        removed_files = [f for f in tmp_files if fu.rm(f)]
        fu.log('Removed: %s' % str(removed_files), out_log)
Esempio n. 8
0
def get_gromacs_version(gmx: str = "gmx") -> int:
    """ Gets the GROMACS installed version and returns it as an int(3) for
    versions older than 5.1.5 and an int(5) for 20XX versions filling the gaps
    with '0' digits.

    Args:
        gmx (str): ('gmx') Path to the GROMACS binary.

    Returns:
        int: GROMACS version.
    """
    unique_dir = fu.create_unique_dir()
    out_log, err_log = fu.get_logs(path=unique_dir, can_write_console=False)
    cmd = [gmx, "-version"]
    try:
        cmd_wrapper.CmdWrapper(cmd, out_log, err_log).launch()
        pattern = re.compile(r"GROMACS version:\s+(.+)")
        with open(Path(unique_dir).joinpath('log.out')) as log_file:
            for line in log_file:
                version_str = pattern.match(line.strip())
                if version_str:
                    break
        version = version_str.group(1).replace(".", "").replace("VERSION",
                                                                "").strip()
        version = "".join([c for c in version if c.isdigit()])
    except:
        return 0
    if version.startswith("2"):
        while len(version) < 5:
            version += '0'
    else:
        while len(version) < 3:
            version += '0'

    fu.rm(unique_dir)
    return int(version)
Esempio n. 9
0
def process_output_fpocket(tmp_folder, output_pockets_zip, output_summary,
                           sort_by, remove_tmp, out_log, classname):
    """ Creates the output_pockets_zip and generates the  output_summary """

    path = str(PurePath(tmp_folder).joinpath('input_out'))

    if not Path(path).is_dir():
        if remove_tmp:
            # remove temporary folder
            fu.rm(tmp_folder)
            fu.log('Removing temporary folder: %s' % tmp_folder, out_log)

        fu.log(
            classname +
            ': Error executing fpocket, please check your properties', out_log)
        raise SystemExit(
            classname +
            ': Error executing fpocket, please check your properties')

    # summary
    # read input_info.txt file
    info = PurePath(path).joinpath('input_info.txt')
    with open(info, 'r') as info_text:
        lines = info_text.readlines()
        lines = [x for x in lines if x != '\n']

    data = {}

    # parse input_info.txt file to python object
    pocket = ''
    for line in lines:
        if not line.startswith('\t'):
            # first level: pocket
            num = re.findall('\d+', line)[0]
            pocket = 'pocket' + num
            data[pocket] = {}
        else:
            # second level: pocket properties
            groups = re.findall('(.*)(?:\ *\:\ *)(.*)', line)[0]
            key = groups[0].lower().strip()
            key = re.sub(r'\-|\.', '', key)
            key = re.sub(r'\s+', '_', key)
            value = float(groups[1]) if '.' in groups[1] else int(groups[1])
            data[pocket][key] = value

    # get number of pockets
    fu.log('%d pockets found' % (len(data)), out_log)

    # sort data by sort_by property
    fu.log('Sorting output data by %s' % (sort_by), out_log)
    data = dict(
        sorted(data.items(),
               key=lambda item: float(item[1][sort_by]),
               reverse=True))

    # compress pockets
    pockets = PurePath(path).joinpath('pockets')
    files_list = [str(i) for i in Path(pockets).iterdir()]
    fu.zip_list(zip_file=output_pockets_zip,
                file_list=files_list,
                out_log=out_log)

    # save summary
    fu.log('Saving summary to %s file' % (output_summary), out_log)
    with open(output_summary, 'w') as outfile:
        json.dump(data, outfile, indent=4)

    if remove_tmp:
        # remove temporary folder
        fu.rm(tmp_folder)
        fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
Esempio n. 10
0
    def launch(self) -> int:
        """Launches the execution of the GROMACS editconf module."""
        tmp_files = []

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # Restart if needed
        if self.restart:
            output_file_list = [self.io_dict['out'].get("output_top_zip_path")]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # Unzip topology
        top_file = fu.unzip_top(
            zip_file=self.io_dict['in'].get("input_top_zip_path"),
            out_log=out_log)
        top_dir = str(Path(top_file).parent)
        tmp_files.append(top_dir)
        itp_name = str(Path(self.io_dict['in'].get("input_itp_path")).name)

        with open(top_file) as top_f:
            top_lines = top_f.readlines()
            top_f.close()
        fu.rm(top_file)

        forcefield_pattern = r'#include.*forcefield.itp\"'
        for index, line in enumerate(top_lines):
            if re.search(forcefield_pattern, line):
                break
        top_lines.insert(index + 1, '\n')
        top_lines.insert(index + 2, '; Including ligand ITP\n')
        top_lines.insert(index + 3, '#include "' + itp_name + '"\n')
        top_lines.insert(index + 4, '\n')
        if self.io_dict['in'].get("input_posres_itp_path"):
            top_lines.insert(index + 5, '; Ligand position restraints' + '\n')
            top_lines.insert(index + 6, '#ifdef ' + self.posres_name + '\n')
            top_lines.insert(
                index + 7, '#include "' + str(
                    Path(self.io_dict['in'].get("input_posres_itp_path")).name)
                + '"\n')
            top_lines.insert(index + 8, '#endif' + '\n')
            top_lines.insert(index + 9, '\n')

        inside_moleculetype_section = False
        with open(self.io_dict['in'].get("input_itp_path")) as itp_file:
            moleculetype_pattern = r'\[ moleculetype \]'
            for line in itp_file:
                if re.search(moleculetype_pattern, line):
                    inside_moleculetype_section = True
                    continue
                if inside_moleculetype_section and not line.startswith(';'):
                    moleculetype = line.strip().split()[0].strip()
                    break

        molecules_pattern = r'\[ molecules \]'
        inside_molecules_section = False
        index_molecule = None
        molecule_string = moleculetype + (20 -
                                          len(moleculetype)) * ' ' + '1' + '\n'
        for index, line in enumerate(top_lines):
            if re.search(molecules_pattern, line):
                inside_molecules_section = True
                continue
            if inside_molecules_section and not line.startswith(
                    ';') and line.upper().startswith('PROTEIN'):
                index_molecule = index

        if index_molecule:
            top_lines.insert(index_molecule + 1, molecule_string)
        else:
            top_lines.append(molecule_string)

        new_top = fu.create_name(path=top_dir,
                                 prefix=self.prefix,
                                 step=self.step,
                                 name='ligand.top')

        with open(new_top, 'w') as new_top_f:
            new_top_f.write("".join(top_lines))

        shutil.copy2(self.io_dict['in'].get("input_itp_path"), top_dir)
        if self.io_dict['in'].get("input_posres_itp_path"):
            shutil.copy2(self.io_dict['in'].get("input_posres_itp_path"),
                         top_dir)

        # zip topology
        fu.log(
            'Compressing topology to: %s' %
            self.io_dict['out'].get("output_top_zip_path"), out_log,
            self.global_log)
        fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path"),
                   top_file=new_top,
                   out_log=out_log)

        if self.remove_tmp:
            fu.rm_file_list(tmp_files, out_log=out_log)

        return 0
Esempio n. 11
0
def remove_tmp_files(list, remove_tmp, out_log):
    """ Removes temporal files generated by the wrapper """
    if remove_tmp:
        tmp_files = list
        removed_files = [f for f in tmp_files if fu.rm(f)]
        fu.log('Removed: %s' % str(removed_files), out_log)
Esempio n. 12
0
    def launch(self):
        """Launches the execution of the template_container module."""
        
        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # Check the properties
        fu.check_properties(self, self.properties)

        # Restart
        if self.restart:
            # 4. Include here all output file paths
            output_file_list = [self.io_dict['out']['output_file_path']]
            if fu.check_complete_files(output_file_list):
                fu.log('Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log)
                return 0

        # 5. Copy inputs to container
        container_io_dict = fu.copy_to_container(self.container_path, self.container_volume_path, self.io_dict)

        # 6. Prepare the command line parameters as instructions list
        instructions = ['-j']
        if self.boolean_property:
            instructions.append('-v')
            fu.log('Appending optional boolean property', out_log, self.global_log)

        # 7. Build the actual command line as a list of items (elements order will be maintained)
        cmd = [self.executable_binary_property,
               ' '.join(instructions), 
               container_io_dict['out']['output_file_path'],
               container_io_dict['in']['input_file_path1']]
        fu.log('Creating command line with instructions and required arguments', out_log, self.global_log)

        # 8. Repeat for optional input files if provided
        if container_io_dict['in']['input_file_path2']:
            # Append optional input_file_path2 to cmd
            cmd.append(container_io_dict['in']['input_file_path2'])
            fu.log('Appending optional argument to command line', out_log, self.global_log)

        # 9. Uncomment to check the command line 
        # print(' '.join(cmd))

        # 10. Create cmd with specdific syntax according to the required container
        cmd = fu.create_cmd_line(cmd, container_path=self.container_path, 
                                 host_volume=container_io_dict.get('unique_dir'), 
                                 container_volume=self.container_volume_path, 
                                 container_working_dir=self.container_working_dir, 
                                 container_user_uid=self.container_user_id, 
                                 container_image=self.container_image, 
                                 container_shell_path=self.container_shell_path, 
                                 out_log=out_log, global_log=self.global_log)

        # Launch execution
        returncode = cmd_wrapper.CmdWrapper(cmd, out_log, err_log, self.global_log).launch()

        # Copy output(s) to output(s) path(s) in case of container execution
        fu.copy_to_host(self.container_path, container_io_dict, self.io_dict)

        # Remove temporary file(s)
        if self.remove_tmp and container_io_dict.get('unique_dir'): 
            fu.rm(container_io_dict.get('unique_dir'))
            fu.log('Removed: %s' % str(container_io_dict.get('unique_dir')), out_log)

        return returncode
Esempio n. 13
0
    def launch(self) -> int:
        """Execute the :class:`AppendLigand <gromacs_extra.append_ligand.AppendLigand>` object."""
        # Setup Biobb
        if self.check_restart(): return 0

        # Unzip topology
        top_file = fu.unzip_top(
            zip_file=self.io_dict['in'].get("input_top_zip_path"),
            out_log=self.out_log)
        top_dir = str(Path(top_file).parent)
        itp_name = str(Path(self.io_dict['in'].get("input_itp_path")).name)

        with open(top_file) as top_f:
            top_lines = top_f.readlines()
            top_f.close()
        fu.rm(top_file)

        forcefield_pattern = r'#include.*forcefield.itp\"'
        if top_lines:
            for index, line in enumerate(top_lines):
                if re.search(forcefield_pattern, line):
                    break
        else:
            fu.log(
                f'FATAL: Input topfile {top_file} from input_top_zip_path {self.io_dict["in"].get("input_top_zip_path")} is empty.',
                self.out_log, self.global_log)
            return 1

        top_lines.insert(index + 1, '\n')
        top_lines.insert(index + 2, '; Including ligand ITP\n')
        top_lines.insert(index + 3, '#include "' + itp_name + '"\n')
        top_lines.insert(index + 4, '\n')
        if self.io_dict['in'].get("input_posres_itp_path"):
            top_lines.insert(index + 5, '; Ligand position restraints' + '\n')
            top_lines.insert(index + 6, '#ifdef ' + self.posres_name + '\n')
            top_lines.insert(
                index + 7, '#include "' + str(
                    Path(self.io_dict['in'].get("input_posres_itp_path")).name)
                + '"\n')
            top_lines.insert(index + 8, '#endif' + '\n')
            top_lines.insert(index + 9, '\n')

        inside_moleculetype_section = False
        with open(self.io_dict['in'].get("input_itp_path")) as itp_file:
            moleculetype_pattern = r'\[ moleculetype \]'
            for line in itp_file:
                if re.search(moleculetype_pattern, line):
                    inside_moleculetype_section = True
                    continue
                if inside_moleculetype_section and not line.startswith(';'):
                    moleculetype = line.strip().split()[0].strip()
                    break

        molecules_pattern = r'\[ molecules \]'
        inside_molecules_section = False
        index_molecule = None
        molecule_string = moleculetype + (20 -
                                          len(moleculetype)) * ' ' + '1' + '\n'
        for index, line in enumerate(top_lines):
            if re.search(molecules_pattern, line):
                inside_molecules_section = True
                continue
            if inside_molecules_section and not line.startswith(
                    ';') and line.upper().startswith('PROTEIN'):
                index_molecule = index

        if index_molecule:
            top_lines.insert(index_molecule + 1, molecule_string)
        else:
            top_lines.append(molecule_string)

        new_top = fu.create_name(path=top_dir,
                                 prefix=self.prefix,
                                 step=self.step,
                                 name='ligand.top')

        with open(new_top, 'w') as new_top_f:
            new_top_f.write("".join(top_lines))

        shutil.copy2(self.io_dict['in'].get("input_itp_path"), top_dir)
        if self.io_dict['in'].get("input_posres_itp_path"):
            shutil.copy2(self.io_dict['in'].get("input_posres_itp_path"),
                         top_dir)

        # zip topology
        fu.log(
            'Compressing topology to: %s' %
            self.io_dict['out'].get("output_top_zip_path"), self.out_log,
            self.global_log)
        fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path"),
                   top_file=new_top,
                   out_log=self.out_log)

        # Remove temporal files
        self.tmp_files.append(top_dir)
        self.remove_tmp_files()

        return 0
Esempio n. 14
0
    def launch(self) -> int:
        """Execute the :class:`BindingSite <utils.bindingsite.BindingSite>` utils.bindingsite.BindingSite object."""

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # check input/output paths and parameters
        self.check_data_params(out_log, err_log)

        # Check the properties
        fu.check_properties(self, self.properties)

        if self.restart:
            output_file_list = [self.io_dict["out"]["output_pdb_path"]]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # Parse structure
        fu.log(
            'Loading input PDB structure %s' %
            (self.io_dict["in"]["input_pdb_path"]), out_log, self.global_log)
        structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name
        parser = Bio.PDB.PDBParser(QUIET=True)
        structPDB = parser.get_structure(structure_name,
                                         self.io_dict["in"]["input_pdb_path"])

        if len(structPDB):
            structPDB = structPDB[0]

        # Use only one chain
        n_chains = structPDB.get_list()
        if len(n_chains) != 1:
            fu.log(
                'More than one chain found in the input PDB structure. Using only the first chain to find the binding site',
                out_log, self.global_log)
            # get first chain in case there is more than one chain
            for struct_chain in structPDB.get_chains():
                structPDB = struct_chain

        # Get AA sequence
        structPDB_seq = get_pdb_sequence(structPDB)
        if len(structPDB_seq) == 0:
            fu.log(
                self.__class__.__name__ +
                ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?'
                % self.io_dict["in"]["input_pdb_path"], out_log)
            raise SystemExit(
                self.__class__.__name__ +
                ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?'
                % self.io_dict["in"]["input_pdb_path"])
        else:
            fu.log(
                'Found %s residues in %s' %
                (len(structPDB_seq), self.io_dict["in"]["input_pdb_path"]),
                out_log)

        # create temporary folder for decompressing the input_clusters_zip file
        unique_dir = PurePath(fu.create_unique_dir())
        fu.log('Creating %s temporary folder' % unique_dir, out_log,
               self.global_log)

        # decompress the input_clusters_zip file
        cluster_list = fu.unzip_list(
            zip_file=self.io_dict["in"]["input_clusters_zip"],
            dest_dir=unique_dir,
            out_log=out_log)

        clusterPDB_ligands_aligned = []
        clusterPDB_ligands_num = 0

        fu.log('Iterating on all clusters:', out_log)

        for idx, cluster_path in enumerate(cluster_list):

            cluster_name = PurePath(cluster_path).stem
            fu.log(' ', out_log)
            fu.log('------------ Iteration #%s --------------' % (idx + 1),
                   out_log)
            fu.log('Cluster member: %s' % cluster_name, out_log)

            # Load and Parse PDB
            clusterPDB = {}
            clusterPDB = parser.get_structure(cluster_name, cluster_path)[0]

            # Use only the first chain
            for cluster_chain in clusterPDB.get_chains():
                clusterPDB = cluster_chain

            # Looking for ligands
            clusterPDB_ligands = get_ligand_residues(clusterPDB)
            if (len(clusterPDB_ligands)) == 0:
                fu.log(
                    'No ligands found that could guide the binding site search. Ignoring this member: %s'
                    % cluster_name, out_log)
                continue

            # Selecting the largest ligand, if more than one
            lig_atoms_num = 0
            clusterPDB_ligand = {}
            if self.ligand:
                if self.ligand in [
                        x.get_resname() for x in clusterPDB_ligands
                ]:
                    for lig in clusterPDB_ligands:
                        if lig.get_resname() == self.ligand:
                            clusterPDB_ligand = lig
                            lig_atoms_num = len(lig.get_list())
                            fu.log(
                                'Ligand found: %s  (%s atoms)' %
                                (lig.get_resname(), lig_atoms_num), out_log)
                else:
                    fu.log(
                        'Ligand %s not found in %s cluster member, skipping this cluster'
                        % (self.ligand, cluster_name), out_log)
                    continue
            else:
                if len(clusterPDB_ligands) > 1:
                    for lig_res in clusterPDB_ligands:
                        lig_res_atoms_num = len(lig_res.get_list())
                        fu.log(
                            'Ligand found: %s  (%s atoms)' %
                            (lig_res.get_resname(), lig_res_atoms_num),
                            out_log)
                        if lig_res_atoms_num > lig_atoms_num:
                            clusterPDB_ligand = lig_res
                            lig_atoms_num = lig_res_atoms_num
                else:
                    clusterPDB_ligand = clusterPDB_ligands[0]
                    lig_atoms_num = len(clusterPDB_ligands[0].get_list())

            fu.log(
                'Member accepted. Valid ligand found: %s (%s atoms)' %
                (clusterPDB_ligand.get_resname(), lig_atoms_num), out_log)

            ## Mapping residues by sequence alignment to match structPDB-clusterPDB paired residues

            # Get AA sequence
            clusterPDB_seq = get_pdb_sequence(clusterPDB)

            # Pairwise align
            aln, residue_map = align_sequences(structPDB_seq, clusterPDB_seq,
                                               self.matrix_name, self.gap_open,
                                               self.gap_extend)
            fu.log(
                'Matching residues to input PDB structure. Alignment is:\n%s' %
                (aln[1]), out_log)

            # Calculate (gapless) sequence identity
            seq_identity, gap_seq_identity = calculate_alignment_identity(
                aln[0], aln[1])
            fu.log('Sequence identity (%%): %s' % (seq_identity), out_log)
            fu.log('Gap less identity (%%): %s' % (gap_seq_identity), out_log)

            ## Selecting aligned CA atoms from first model, first chain

            struct_atoms = []
            cluster_atoms = []

            for struct_res in residue_map:
                try:
                    cluster_atoms.append(
                        clusterPDB[residue_map[struct_res]]['CA'])
                    struct_atoms.append(
                        get_residue_by_id(structPDB, struct_res)['CA'])
                except KeyError:
                    fu.log(
                        'Cannot find CA atom for residue %s  (input PDB  %s)' %
                        (get_residue_by_id(
                            structPDB, struct_res).get_resname(), struct_res),
                        out_log)
                    pass

            if len(cluster_atoms) == 0:
                fu.log(
                    self.__class__.__name__ +
                    ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.'
                    % (cluster_name, structure_name), out_log)
                raise SystemExit(
                    self.__class__.__name__ +
                    ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.'
                    % (cluster_name, structure_name))
            else:
                fu.log(
                    'Superimposing %s aligned protein residues' %
                    (len(cluster_atoms)), out_log)

            ## Align against input structure

            si = Bio.PDB.Superimposer()
            si.set_atoms(struct_atoms, cluster_atoms)
            si.apply(clusterPDB.get_atoms())
            fu.log('RMSD: %s' % (si.rms), out_log)

            # Save transformed structure (and ligand)
            clusterPDB_ligand_aligned = clusterPDB[clusterPDB_ligand.get_id()]
            fu.log('Saving transformed ligand coordinates', out_log)

            clusterPDB_ligands_aligned.append(clusterPDB_ligand_aligned)

            ##  Stop after n accepted cluster members

            clusterPDB_ligands_num += 1

            if clusterPDB_ligands_num > self.max_num_ligands:
                break

        fu.log(' ', out_log)
        fu.log('----------------------------------------', out_log)
        fu.log(
            'All transformed ligand coordinates saved, getting binding site residues',
            out_log)

        ## Select binding site atoms as those around cluster superimposed ligands

        fu.log(
            'Defining binding site residues as those %sÅ around the %s cluster superimposed ligands'
            % (self.radius, clusterPDB_ligands_num), out_log)

        # select Atoms from aligned ligands
        clusterPDB_ligands_aligned2 = [
            res for res in clusterPDB_ligands_aligned
        ]
        clusterPDB_ligands_aligned_atoms = Bio.PDB.Selection.unfold_entities(
            clusterPDB_ligands_aligned2, 'A')

        # select Atoms from input PDB structure
        structPDB_atoms = [atom for atom in structPDB.get_atoms()]

        # compute neighbors for aligned ligands in the input PDB structure
        structPDB_bs_residues_raw = {}
        structPDB_neighbors = Bio.PDB.NeighborSearch(structPDB_atoms)
        for ligand_atom in clusterPDB_ligands_aligned_atoms:
            # look for PDB atoms 5A around each ligand atom
            k_l = structPDB_neighbors.search(ligand_atom.coord, self.radius,
                                             'R')
            for k in k_l:
                structPDB_bs_residues_raw[k.get_id()] = k.get_full_id()

        ## Save binding site to PDB

        io = Bio.PDB.PDBIO()
        fu.log(
            'Writing binding site residues into %s' %
            (self.io_dict["out"]["output_pdb_path"]), out_log)

        # unselect input PDB atoms not in binding site
        structPDB_bs_atoms = 0
        p = re.compile('H_|W_|W')
        residue_ids_to_remove = []
        for res in structPDB.get_residues():
            if res.id not in structPDB_bs_residues_raw.keys():
                # add residue to residue_ids_to_remove list
                residue_ids_to_remove.append(res.id)
            elif p.match(res.resname):
                # add residue to residue_ids_to_remove list
                residue_ids_to_remove.append(res.id)
            else:
                # this residue will be preserved
                structPDB_bs_atoms += len(res.get_list())

        # unselect input PDB atoms not in binding site
        for chain in structPDB:
            for idr in residue_ids_to_remove:
                chain.detach_child(idr)

        # write PDB file
        io.set_structure(structPDB)
        io.save(self.io_dict["out"]["output_pdb_path"])

        if self.remove_tmp:
            # remove temporary folder
            fu.rm(unique_dir)
            fu.log(' ', out_log)
            fu.log('----------------------------------------', out_log)
            fu.log('Removed temporary folder: %s' % unique_dir, out_log)

        return 0
Esempio n. 15
0
    def launch(self):
        """Launches the execution of the template module."""

        # Get local loggers from launchlogger decorator
        out_log = getattr(self, 'out_log', None)
        err_log = getattr(self, 'err_log', None)

        # Check the properties
        fu.check_properties(self, self.properties)

        # Restart
        if self.restart:
            # 4. Include here all output file paths
            output_file_list = [self.io_dict['out']['output_file_path']]
            if fu.check_complete_files(output_file_list):
                fu.log(
                    'Restart is enabled, this step: %s will the skipped' %
                    self.step, out_log, self.global_log)
                return 0

        # Creating temporary folder
        self.tmp_folder = fu.create_unique_dir()
        fu.log('Creating %s temporary folder' % self.tmp_folder, out_log)

        # 5. Include here all mandatory input files
        # Copy input_file_path1 to temporary folder
        shutil.copy(self.io_dict['in']['input_file_path1'], self.tmp_folder)

        # 6. Prepare the command line parameters as instructions list
        instructions = ['-j']
        if self.boolean_property:
            instructions.append('-v')
            fu.log('Appending optional boolean property', out_log,
                   self.global_log)

        # 7. Build the actual command line as a list of items (elements order will be maintained)
        cmd = [
            self.executable_binary_property, ' '.join(instructions),
            self.io_dict['out']['output_file_path'],
            str(
                PurePath(self.tmp_folder).joinpath(
                    PurePath(self.io_dict['in']['input_file_path1']).name))
        ]
        fu.log(
            'Creating command line with instructions and required arguments',
            out_log, self.global_log)

        # 8. Repeat for optional input files if provided
        if self.io_dict['in']['input_file_path2']:
            # Copy input_file_path2 to temporary folder
            shutil.copy(self.io_dict['in']['input_file_path2'],
                        self.tmp_folder)
            # Append optional input_file_path2 to cmd
            cmd.append(
                str(
                    PurePath(self.tmp_folder).joinpath(
                        PurePath(
                            self.io_dict['in']['input_file_path2']).name)))
            fu.log('Appending optional argument to command line', out_log,
                   self.global_log)

        # 9. Uncomment to check the command line
        # print(' '.join(cmd))

        # Launch execution
        returncode = cmd_wrapper.CmdWrapper(cmd, out_log, err_log,
                                            self.global_log).launch()

        # Remove temporary file(s)
        if self.remove_tmp:
            fu.rm(self.tmp_folder)
            fu.log('Removed: %s' % str(self.tmp_folder), out_log)

        return returncode