def process_output_fpocket_filter(search_list, tmp_folder, input_pockets_zip, output_filter_pockets_zip, remove_tmp, out_log): """ Creates the output_filter_pockets_zip """ # decompress the input_pockets_zip file to tmp_folder cluster_list = fu.unzip_list(zip_file=input_pockets_zip, dest_dir=tmp_folder, out_log=out_log) # list all files of tmp_folder pockets_list = [str(i) for i in Path(tmp_folder).iterdir()] # select search_list items from pockets_list sel_pockets_list = [ p for p in pockets_list for s in search_list if s + '_' in p ] fu.log('Creating %s output file' % output_filter_pockets_zip, out_log) # compress output to output_filter_pockets_zip fu.zip_list(zip_file=output_filter_pockets_zip, file_list=sel_pockets_list, out_log=out_log) if remove_tmp: # remove temporary folder fu.rm(tmp_folder) fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
def process_output_gmx(unique_name, files_folder, remove_tmp, basename, class_params, output_files, out_log): """ Moves and removes temporal files generated by the wrapper """ path = files_folder suffix = class_params['suffix'] src_files = glob.glob(path + '/' + basename + '.' + unique_name + suffix + '*') # copy files for the requested topology to the output_path for file_name in src_files: # replace random name by original name in all files with fileinput.FileInput(file_name, inplace=True) as file: for line in file: print(line.replace(basename + '.' + unique_name, basename), end='') if (Path(file_name).is_file()): file_extension = PurePath(file_name).suffix # in top files for gromacs, replace file.itp by name given by user if (file_extension[1:] == 'top'): with open(file_name) as f: newText = f.read().replace( basename + '_GMX.itp', PurePath(output_files['itp']).name) with open(file_name, "w") as f: f.write(newText) shutil.copy(file_name, output_files[file_extension[1:]]) fu.log( 'File %s succesfully created' % output_files[file_extension[1:]], out_log) if remove_tmp: # remove temporary folder fu.rm(files_folder) fu.log('Removed temporary folder: %s' % files_folder, out_log)
def launch(self) -> int: """Execute the :class:`PdbClusterZip <api.pdb_cluster_zip.PdbClusterZip>` api.pdb_cluster_zip.PdbClusterZip object.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() check_mandatory_property(self.pdb_code, 'pdb_code', self.out_log, self.__class__.__name__) self.pdb_code = self.pdb_code.strip().lower() file_list = [] #Downloading PDB_files pdb_code_list = get_cluster_pdb_codes(pdb_code=self.pdb_code, cluster=self.cluster, out_log=self.out_log, global_log=self.global_log) unique_dir = fu.create_unique_dir() for pdb_code in pdb_code_list: pdb_file = os.path.join(unique_dir, pdb_code+".pdb") pdb_string = download_pdb(pdb_code=pdb_code, api_id=self.api_id, out_log=self.out_log, global_log=self.global_log) write_pdb(pdb_string, pdb_file, self.filter, self.out_log, self.global_log) file_list.append(os.path.abspath(pdb_file)) #Zipping files fu.log("Zipping the pdb files to: %s" % self.output_pdb_zip_path) fu.zip_list(self.output_pdb_zip_path, file_list, out_log=self.out_log) if self.remove_tmp: # remove temporary folder fu.rm(unique_dir) fu.log('Removed temporary folder: %s' % unique_dir, self.out_log) return 0
def process_output(unique_name, files_folder, remove_tmp, basename, class_params, output_files, out_log): """ Moves and removes temporal files generated by the wrapper """ path = files_folder suffix = class_params['suffix'] src_files = glob.glob(path + '/' + basename + '.' + unique_name + suffix + '*') # copy files for the requested topology to the output_path for file_name in src_files: # replace random name by original name in all files with fileinput.FileInput(file_name, inplace=True) as file: for line in file: print(line.replace(basename + '.' + unique_name, basename), end='') if (Path(file_name).is_file()): file_extension = PurePath(file_name).suffix shutil.copy(file_name, output_files[file_extension[1:]]) fu.log( 'File %s succesfully created' % output_files[file_extension[1:]], out_log) if remove_tmp: # remove temporary folder fu.rm(files_folder) fu.log('Removed temporary folder: %s' % files_folder, out_log)
def launch(self) -> int: """Execute the :class:`FPocketSelect <fpocket.fpocket_select.FPocketSelect>` fpocket.fpocket_select.FPocketSelect object.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # check input/output paths and parameters self.check_data_params(out_log, err_log) # Check the properties fu.check_properties(self, self.properties) if self.restart: output_file_list = [ self.io_dict["out"]["output_pocket_pdb"], self.io_dict["out"]["output_pocket_pqr"] ] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # create tmp_folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, out_log) # decompress the input_pockets_zip file to tmp_folder all_pockets = fu.unzip_list( zip_file=self.io_dict["in"]["input_pockets_zip"], dest_dir=self.tmp_folder, out_log=out_log) pockets_list = [ i for i in all_pockets if ('pocket' + str(self.pocket)) in i ] for p in pockets_list: if PurePath(p).suffix == '.pdb': fu.log( 'Saving %s file' % self.io_dict["out"]["output_pocket_pdb"], out_log) shutil.copy(p, self.io_dict["out"]["output_pocket_pdb"]) else: fu.log( 'Saving %s file' % self.io_dict["out"]["output_pocket_pqr"], out_log) shutil.copy(p, self.io_dict["out"]["output_pocket_pqr"]) if self.remove_tmp: # remove temporary folder fu.rm(self.tmp_folder) fu.log('Removed temporary folder: %s' % self.tmp_folder, out_log) return 0
def process_output_trjconv_str_ens(tmp_folder, remove_tmp, output_file, glob_pattern, out_log): """ Compresses, moves and removes temporal files generated by the wrapper """ # list all files in temporary folder tmp_fl = list(Path(tmp_folder).glob(glob_pattern)) files_list = [] for file_name in tmp_fl: files_list.append(file_name) # adding files from temporary folder to zip fu.zip_list(output_file, files_list, out_log) if remove_tmp: # remove temporary folder fu.rm(tmp_folder) fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
def remove_tmp_files(list, remove_tmp, out_log, input_top_path_orig=None, input_top_path=None): """ Removes temporal files generated by the wrapper """ tmp_files = list if zipfile.is_zipfile(input_top_path_orig): tmp_files.append(PurePath(input_top_path).parent) if remove_tmp: removed_files = [f for f in tmp_files if fu.rm(f)] fu.log('Removed: %s' % str(removed_files), out_log)
def get_gromacs_version(gmx: str = "gmx") -> int: """ Gets the GROMACS installed version and returns it as an int(3) for versions older than 5.1.5 and an int(5) for 20XX versions filling the gaps with '0' digits. Args: gmx (str): ('gmx') Path to the GROMACS binary. Returns: int: GROMACS version. """ unique_dir = fu.create_unique_dir() out_log, err_log = fu.get_logs(path=unique_dir, can_write_console=False) cmd = [gmx, "-version"] try: cmd_wrapper.CmdWrapper(cmd, out_log, err_log).launch() pattern = re.compile(r"GROMACS version:\s+(.+)") with open(Path(unique_dir).joinpath('log.out')) as log_file: for line in log_file: version_str = pattern.match(line.strip()) if version_str: break version = version_str.group(1).replace(".", "").replace("VERSION", "").strip() version = "".join([c for c in version if c.isdigit()]) except: return 0 if version.startswith("2"): while len(version) < 5: version += '0' else: while len(version) < 3: version += '0' fu.rm(unique_dir) return int(version)
def process_output_fpocket(tmp_folder, output_pockets_zip, output_summary, sort_by, remove_tmp, out_log, classname): """ Creates the output_pockets_zip and generates the output_summary """ path = str(PurePath(tmp_folder).joinpath('input_out')) if not Path(path).is_dir(): if remove_tmp: # remove temporary folder fu.rm(tmp_folder) fu.log('Removing temporary folder: %s' % tmp_folder, out_log) fu.log( classname + ': Error executing fpocket, please check your properties', out_log) raise SystemExit( classname + ': Error executing fpocket, please check your properties') # summary # read input_info.txt file info = PurePath(path).joinpath('input_info.txt') with open(info, 'r') as info_text: lines = info_text.readlines() lines = [x for x in lines if x != '\n'] data = {} # parse input_info.txt file to python object pocket = '' for line in lines: if not line.startswith('\t'): # first level: pocket num = re.findall('\d+', line)[0] pocket = 'pocket' + num data[pocket] = {} else: # second level: pocket properties groups = re.findall('(.*)(?:\ *\:\ *)(.*)', line)[0] key = groups[0].lower().strip() key = re.sub(r'\-|\.', '', key) key = re.sub(r'\s+', '_', key) value = float(groups[1]) if '.' in groups[1] else int(groups[1]) data[pocket][key] = value # get number of pockets fu.log('%d pockets found' % (len(data)), out_log) # sort data by sort_by property fu.log('Sorting output data by %s' % (sort_by), out_log) data = dict( sorted(data.items(), key=lambda item: float(item[1][sort_by]), reverse=True)) # compress pockets pockets = PurePath(path).joinpath('pockets') files_list = [str(i) for i in Path(pockets).iterdir()] fu.zip_list(zip_file=output_pockets_zip, file_list=files_list, out_log=out_log) # save summary fu.log('Saving summary to %s file' % (output_summary), out_log) with open(output_summary, 'w') as outfile: json.dump(data, outfile, indent=4) if remove_tmp: # remove temporary folder fu.rm(tmp_folder) fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
def launch(self) -> int: """Launches the execution of the GROMACS editconf module.""" tmp_files = [] # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # Restart if needed if self.restart: output_file_list = [self.io_dict['out'].get("output_top_zip_path")] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # Unzip topology top_file = fu.unzip_top( zip_file=self.io_dict['in'].get("input_top_zip_path"), out_log=out_log) top_dir = str(Path(top_file).parent) tmp_files.append(top_dir) itp_name = str(Path(self.io_dict['in'].get("input_itp_path")).name) with open(top_file) as top_f: top_lines = top_f.readlines() top_f.close() fu.rm(top_file) forcefield_pattern = r'#include.*forcefield.itp\"' for index, line in enumerate(top_lines): if re.search(forcefield_pattern, line): break top_lines.insert(index + 1, '\n') top_lines.insert(index + 2, '; Including ligand ITP\n') top_lines.insert(index + 3, '#include "' + itp_name + '"\n') top_lines.insert(index + 4, '\n') if self.io_dict['in'].get("input_posres_itp_path"): top_lines.insert(index + 5, '; Ligand position restraints' + '\n') top_lines.insert(index + 6, '#ifdef ' + self.posres_name + '\n') top_lines.insert( index + 7, '#include "' + str( Path(self.io_dict['in'].get("input_posres_itp_path")).name) + '"\n') top_lines.insert(index + 8, '#endif' + '\n') top_lines.insert(index + 9, '\n') inside_moleculetype_section = False with open(self.io_dict['in'].get("input_itp_path")) as itp_file: moleculetype_pattern = r'\[ moleculetype \]' for line in itp_file: if re.search(moleculetype_pattern, line): inside_moleculetype_section = True continue if inside_moleculetype_section and not line.startswith(';'): moleculetype = line.strip().split()[0].strip() break molecules_pattern = r'\[ molecules \]' inside_molecules_section = False index_molecule = None molecule_string = moleculetype + (20 - len(moleculetype)) * ' ' + '1' + '\n' for index, line in enumerate(top_lines): if re.search(molecules_pattern, line): inside_molecules_section = True continue if inside_molecules_section and not line.startswith( ';') and line.upper().startswith('PROTEIN'): index_molecule = index if index_molecule: top_lines.insert(index_molecule + 1, molecule_string) else: top_lines.append(molecule_string) new_top = fu.create_name(path=top_dir, prefix=self.prefix, step=self.step, name='ligand.top') with open(new_top, 'w') as new_top_f: new_top_f.write("".join(top_lines)) shutil.copy2(self.io_dict['in'].get("input_itp_path"), top_dir) if self.io_dict['in'].get("input_posres_itp_path"): shutil.copy2(self.io_dict['in'].get("input_posres_itp_path"), top_dir) # zip topology fu.log( 'Compressing topology to: %s' % self.io_dict['out'].get("output_top_zip_path"), out_log, self.global_log) fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path"), top_file=new_top, out_log=out_log) if self.remove_tmp: fu.rm_file_list(tmp_files, out_log=out_log) return 0
def remove_tmp_files(list, remove_tmp, out_log): """ Removes temporal files generated by the wrapper """ if remove_tmp: tmp_files = list removed_files = [f for f in tmp_files if fu.rm(f)] fu.log('Removed: %s' % str(removed_files), out_log)
def launch(self): """Launches the execution of the template_container module.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # Check the properties fu.check_properties(self, self.properties) # Restart if self.restart: # 4. Include here all output file paths output_file_list = [self.io_dict['out']['output_file_path']] if fu.check_complete_files(output_file_list): fu.log('Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # 5. Copy inputs to container container_io_dict = fu.copy_to_container(self.container_path, self.container_volume_path, self.io_dict) # 6. Prepare the command line parameters as instructions list instructions = ['-j'] if self.boolean_property: instructions.append('-v') fu.log('Appending optional boolean property', out_log, self.global_log) # 7. Build the actual command line as a list of items (elements order will be maintained) cmd = [self.executable_binary_property, ' '.join(instructions), container_io_dict['out']['output_file_path'], container_io_dict['in']['input_file_path1']] fu.log('Creating command line with instructions and required arguments', out_log, self.global_log) # 8. Repeat for optional input files if provided if container_io_dict['in']['input_file_path2']: # Append optional input_file_path2 to cmd cmd.append(container_io_dict['in']['input_file_path2']) fu.log('Appending optional argument to command line', out_log, self.global_log) # 9. Uncomment to check the command line # print(' '.join(cmd)) # 10. Create cmd with specdific syntax according to the required container cmd = fu.create_cmd_line(cmd, container_path=self.container_path, host_volume=container_io_dict.get('unique_dir'), container_volume=self.container_volume_path, container_working_dir=self.container_working_dir, container_user_uid=self.container_user_id, container_image=self.container_image, container_shell_path=self.container_shell_path, out_log=out_log, global_log=self.global_log) # Launch execution returncode = cmd_wrapper.CmdWrapper(cmd, out_log, err_log, self.global_log).launch() # Copy output(s) to output(s) path(s) in case of container execution fu.copy_to_host(self.container_path, container_io_dict, self.io_dict) # Remove temporary file(s) if self.remove_tmp and container_io_dict.get('unique_dir'): fu.rm(container_io_dict.get('unique_dir')) fu.log('Removed: %s' % str(container_io_dict.get('unique_dir')), out_log) return returncode
def launch(self) -> int: """Execute the :class:`AppendLigand <gromacs_extra.append_ligand.AppendLigand>` object.""" # Setup Biobb if self.check_restart(): return 0 # Unzip topology top_file = fu.unzip_top( zip_file=self.io_dict['in'].get("input_top_zip_path"), out_log=self.out_log) top_dir = str(Path(top_file).parent) itp_name = str(Path(self.io_dict['in'].get("input_itp_path")).name) with open(top_file) as top_f: top_lines = top_f.readlines() top_f.close() fu.rm(top_file) forcefield_pattern = r'#include.*forcefield.itp\"' if top_lines: for index, line in enumerate(top_lines): if re.search(forcefield_pattern, line): break else: fu.log( f'FATAL: Input topfile {top_file} from input_top_zip_path {self.io_dict["in"].get("input_top_zip_path")} is empty.', self.out_log, self.global_log) return 1 top_lines.insert(index + 1, '\n') top_lines.insert(index + 2, '; Including ligand ITP\n') top_lines.insert(index + 3, '#include "' + itp_name + '"\n') top_lines.insert(index + 4, '\n') if self.io_dict['in'].get("input_posres_itp_path"): top_lines.insert(index + 5, '; Ligand position restraints' + '\n') top_lines.insert(index + 6, '#ifdef ' + self.posres_name + '\n') top_lines.insert( index + 7, '#include "' + str( Path(self.io_dict['in'].get("input_posres_itp_path")).name) + '"\n') top_lines.insert(index + 8, '#endif' + '\n') top_lines.insert(index + 9, '\n') inside_moleculetype_section = False with open(self.io_dict['in'].get("input_itp_path")) as itp_file: moleculetype_pattern = r'\[ moleculetype \]' for line in itp_file: if re.search(moleculetype_pattern, line): inside_moleculetype_section = True continue if inside_moleculetype_section and not line.startswith(';'): moleculetype = line.strip().split()[0].strip() break molecules_pattern = r'\[ molecules \]' inside_molecules_section = False index_molecule = None molecule_string = moleculetype + (20 - len(moleculetype)) * ' ' + '1' + '\n' for index, line in enumerate(top_lines): if re.search(molecules_pattern, line): inside_molecules_section = True continue if inside_molecules_section and not line.startswith( ';') and line.upper().startswith('PROTEIN'): index_molecule = index if index_molecule: top_lines.insert(index_molecule + 1, molecule_string) else: top_lines.append(molecule_string) new_top = fu.create_name(path=top_dir, prefix=self.prefix, step=self.step, name='ligand.top') with open(new_top, 'w') as new_top_f: new_top_f.write("".join(top_lines)) shutil.copy2(self.io_dict['in'].get("input_itp_path"), top_dir) if self.io_dict['in'].get("input_posres_itp_path"): shutil.copy2(self.io_dict['in'].get("input_posres_itp_path"), top_dir) # zip topology fu.log( 'Compressing topology to: %s' % self.io_dict['out'].get("output_top_zip_path"), self.out_log, self.global_log) fu.zip_top(zip_file=self.io_dict['out'].get("output_top_zip_path"), top_file=new_top, out_log=self.out_log) # Remove temporal files self.tmp_files.append(top_dir) self.remove_tmp_files() return 0
def launch(self) -> int: """Execute the :class:`BindingSite <utils.bindingsite.BindingSite>` utils.bindingsite.BindingSite object.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # check input/output paths and parameters self.check_data_params(out_log, err_log) # Check the properties fu.check_properties(self, self.properties) if self.restart: output_file_list = [self.io_dict["out"]["output_pdb_path"]] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # Parse structure fu.log( 'Loading input PDB structure %s' % (self.io_dict["in"]["input_pdb_path"]), out_log, self.global_log) structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name parser = Bio.PDB.PDBParser(QUIET=True) structPDB = parser.get_structure(structure_name, self.io_dict["in"]["input_pdb_path"]) if len(structPDB): structPDB = structPDB[0] # Use only one chain n_chains = structPDB.get_list() if len(n_chains) != 1: fu.log( 'More than one chain found in the input PDB structure. Using only the first chain to find the binding site', out_log, self.global_log) # get first chain in case there is more than one chain for struct_chain in structPDB.get_chains(): structPDB = struct_chain # Get AA sequence structPDB_seq = get_pdb_sequence(structPDB) if len(structPDB_seq) == 0: fu.log( self.__class__.__name__ + ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?' % self.io_dict["in"]["input_pdb_path"], out_log) raise SystemExit( self.__class__.__name__ + ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?' % self.io_dict["in"]["input_pdb_path"]) else: fu.log( 'Found %s residues in %s' % (len(structPDB_seq), self.io_dict["in"]["input_pdb_path"]), out_log) # create temporary folder for decompressing the input_clusters_zip file unique_dir = PurePath(fu.create_unique_dir()) fu.log('Creating %s temporary folder' % unique_dir, out_log, self.global_log) # decompress the input_clusters_zip file cluster_list = fu.unzip_list( zip_file=self.io_dict["in"]["input_clusters_zip"], dest_dir=unique_dir, out_log=out_log) clusterPDB_ligands_aligned = [] clusterPDB_ligands_num = 0 fu.log('Iterating on all clusters:', out_log) for idx, cluster_path in enumerate(cluster_list): cluster_name = PurePath(cluster_path).stem fu.log(' ', out_log) fu.log('------------ Iteration #%s --------------' % (idx + 1), out_log) fu.log('Cluster member: %s' % cluster_name, out_log) # Load and Parse PDB clusterPDB = {} clusterPDB = parser.get_structure(cluster_name, cluster_path)[0] # Use only the first chain for cluster_chain in clusterPDB.get_chains(): clusterPDB = cluster_chain # Looking for ligands clusterPDB_ligands = get_ligand_residues(clusterPDB) if (len(clusterPDB_ligands)) == 0: fu.log( 'No ligands found that could guide the binding site search. Ignoring this member: %s' % cluster_name, out_log) continue # Selecting the largest ligand, if more than one lig_atoms_num = 0 clusterPDB_ligand = {} if self.ligand: if self.ligand in [ x.get_resname() for x in clusterPDB_ligands ]: for lig in clusterPDB_ligands: if lig.get_resname() == self.ligand: clusterPDB_ligand = lig lig_atoms_num = len(lig.get_list()) fu.log( 'Ligand found: %s (%s atoms)' % (lig.get_resname(), lig_atoms_num), out_log) else: fu.log( 'Ligand %s not found in %s cluster member, skipping this cluster' % (self.ligand, cluster_name), out_log) continue else: if len(clusterPDB_ligands) > 1: for lig_res in clusterPDB_ligands: lig_res_atoms_num = len(lig_res.get_list()) fu.log( 'Ligand found: %s (%s atoms)' % (lig_res.get_resname(), lig_res_atoms_num), out_log) if lig_res_atoms_num > lig_atoms_num: clusterPDB_ligand = lig_res lig_atoms_num = lig_res_atoms_num else: clusterPDB_ligand = clusterPDB_ligands[0] lig_atoms_num = len(clusterPDB_ligands[0].get_list()) fu.log( 'Member accepted. Valid ligand found: %s (%s atoms)' % (clusterPDB_ligand.get_resname(), lig_atoms_num), out_log) ## Mapping residues by sequence alignment to match structPDB-clusterPDB paired residues # Get AA sequence clusterPDB_seq = get_pdb_sequence(clusterPDB) # Pairwise align aln, residue_map = align_sequences(structPDB_seq, clusterPDB_seq, self.matrix_name, self.gap_open, self.gap_extend) fu.log( 'Matching residues to input PDB structure. Alignment is:\n%s' % (aln[1]), out_log) # Calculate (gapless) sequence identity seq_identity, gap_seq_identity = calculate_alignment_identity( aln[0], aln[1]) fu.log('Sequence identity (%%): %s' % (seq_identity), out_log) fu.log('Gap less identity (%%): %s' % (gap_seq_identity), out_log) ## Selecting aligned CA atoms from first model, first chain struct_atoms = [] cluster_atoms = [] for struct_res in residue_map: try: cluster_atoms.append( clusterPDB[residue_map[struct_res]]['CA']) struct_atoms.append( get_residue_by_id(structPDB, struct_res)['CA']) except KeyError: fu.log( 'Cannot find CA atom for residue %s (input PDB %s)' % (get_residue_by_id( structPDB, struct_res).get_resname(), struct_res), out_log) pass if len(cluster_atoms) == 0: fu.log( self.__class__.__name__ + ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.' % (cluster_name, structure_name), out_log) raise SystemExit( self.__class__.__name__ + ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.' % (cluster_name, structure_name)) else: fu.log( 'Superimposing %s aligned protein residues' % (len(cluster_atoms)), out_log) ## Align against input structure si = Bio.PDB.Superimposer() si.set_atoms(struct_atoms, cluster_atoms) si.apply(clusterPDB.get_atoms()) fu.log('RMSD: %s' % (si.rms), out_log) # Save transformed structure (and ligand) clusterPDB_ligand_aligned = clusterPDB[clusterPDB_ligand.get_id()] fu.log('Saving transformed ligand coordinates', out_log) clusterPDB_ligands_aligned.append(clusterPDB_ligand_aligned) ## Stop after n accepted cluster members clusterPDB_ligands_num += 1 if clusterPDB_ligands_num > self.max_num_ligands: break fu.log(' ', out_log) fu.log('----------------------------------------', out_log) fu.log( 'All transformed ligand coordinates saved, getting binding site residues', out_log) ## Select binding site atoms as those around cluster superimposed ligands fu.log( 'Defining binding site residues as those %sÅ around the %s cluster superimposed ligands' % (self.radius, clusterPDB_ligands_num), out_log) # select Atoms from aligned ligands clusterPDB_ligands_aligned2 = [ res for res in clusterPDB_ligands_aligned ] clusterPDB_ligands_aligned_atoms = Bio.PDB.Selection.unfold_entities( clusterPDB_ligands_aligned2, 'A') # select Atoms from input PDB structure structPDB_atoms = [atom for atom in structPDB.get_atoms()] # compute neighbors for aligned ligands in the input PDB structure structPDB_bs_residues_raw = {} structPDB_neighbors = Bio.PDB.NeighborSearch(structPDB_atoms) for ligand_atom in clusterPDB_ligands_aligned_atoms: # look for PDB atoms 5A around each ligand atom k_l = structPDB_neighbors.search(ligand_atom.coord, self.radius, 'R') for k in k_l: structPDB_bs_residues_raw[k.get_id()] = k.get_full_id() ## Save binding site to PDB io = Bio.PDB.PDBIO() fu.log( 'Writing binding site residues into %s' % (self.io_dict["out"]["output_pdb_path"]), out_log) # unselect input PDB atoms not in binding site structPDB_bs_atoms = 0 p = re.compile('H_|W_|W') residue_ids_to_remove = [] for res in structPDB.get_residues(): if res.id not in structPDB_bs_residues_raw.keys(): # add residue to residue_ids_to_remove list residue_ids_to_remove.append(res.id) elif p.match(res.resname): # add residue to residue_ids_to_remove list residue_ids_to_remove.append(res.id) else: # this residue will be preserved structPDB_bs_atoms += len(res.get_list()) # unselect input PDB atoms not in binding site for chain in structPDB: for idr in residue_ids_to_remove: chain.detach_child(idr) # write PDB file io.set_structure(structPDB) io.save(self.io_dict["out"]["output_pdb_path"]) if self.remove_tmp: # remove temporary folder fu.rm(unique_dir) fu.log(' ', out_log) fu.log('----------------------------------------', out_log) fu.log('Removed temporary folder: %s' % unique_dir, out_log) return 0
def launch(self): """Launches the execution of the template module.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # Check the properties fu.check_properties(self, self.properties) # Restart if self.restart: # 4. Include here all output file paths output_file_list = [self.io_dict['out']['output_file_path']] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # Creating temporary folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, out_log) # 5. Include here all mandatory input files # Copy input_file_path1 to temporary folder shutil.copy(self.io_dict['in']['input_file_path1'], self.tmp_folder) # 6. Prepare the command line parameters as instructions list instructions = ['-j'] if self.boolean_property: instructions.append('-v') fu.log('Appending optional boolean property', out_log, self.global_log) # 7. Build the actual command line as a list of items (elements order will be maintained) cmd = [ self.executable_binary_property, ' '.join(instructions), self.io_dict['out']['output_file_path'], str( PurePath(self.tmp_folder).joinpath( PurePath(self.io_dict['in']['input_file_path1']).name)) ] fu.log( 'Creating command line with instructions and required arguments', out_log, self.global_log) # 8. Repeat for optional input files if provided if self.io_dict['in']['input_file_path2']: # Copy input_file_path2 to temporary folder shutil.copy(self.io_dict['in']['input_file_path2'], self.tmp_folder) # Append optional input_file_path2 to cmd cmd.append( str( PurePath(self.tmp_folder).joinpath( PurePath( self.io_dict['in']['input_file_path2']).name))) fu.log('Appending optional argument to command line', out_log, self.global_log) # 9. Uncomment to check the command line # print(' '.join(cmd)) # Launch execution returncode = cmd_wrapper.CmdWrapper(cmd, out_log, err_log, self.global_log).launch() # Remove temporary file(s) if self.remove_tmp: fu.rm(self.tmp_folder) fu.log('Removed: %s' % str(self.tmp_folder), out_log) return returncode