def process_output_fpocket_filter(search_list, tmp_folder, input_pockets_zip, output_filter_pockets_zip, remove_tmp, out_log): """ Creates the output_filter_pockets_zip """ # decompress the input_pockets_zip file to tmp_folder cluster_list = fu.unzip_list(zip_file=input_pockets_zip, dest_dir=tmp_folder, out_log=out_log) # list all files of tmp_folder pockets_list = [str(i) for i in Path(tmp_folder).iterdir()] # select search_list items from pockets_list sel_pockets_list = [ p for p in pockets_list for s in search_list if s + '_' in p ] fu.log('Creating %s output file' % output_filter_pockets_zip, out_log) # compress output to output_filter_pockets_zip fu.zip_list(zip_file=output_filter_pockets_zip, file_list=sel_pockets_list, out_log=out_log) if remove_tmp: # remove temporary folder fu.rm(tmp_folder) fu.log('Removed temporary folder: %s' % tmp_folder, out_log)
def launch(self) -> int: """Execute the :class:`FPocketSelect <fpocket.fpocket_select.FPocketSelect>` fpocket.fpocket_select.FPocketSelect object.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # check input/output paths and parameters self.check_data_params(out_log, err_log) # Check the properties fu.check_properties(self, self.properties) if self.restart: output_file_list = [ self.io_dict["out"]["output_pocket_pdb"], self.io_dict["out"]["output_pocket_pqr"] ] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # create tmp_folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, out_log) # decompress the input_pockets_zip file to tmp_folder all_pockets = fu.unzip_list( zip_file=self.io_dict["in"]["input_pockets_zip"], dest_dir=self.tmp_folder, out_log=out_log) pockets_list = [ i for i in all_pockets if ('pocket' + str(self.pocket)) in i ] for p in pockets_list: if PurePath(p).suffix == '.pdb': fu.log( 'Saving %s file' % self.io_dict["out"]["output_pocket_pdb"], out_log) shutil.copy(p, self.io_dict["out"]["output_pocket_pdb"]) else: fu.log( 'Saving %s file' % self.io_dict["out"]["output_pocket_pqr"], out_log) shutil.copy(p, self.io_dict["out"]["output_pocket_pqr"]) if self.remove_tmp: # remove temporary folder fu.rm(self.tmp_folder) fu.log('Removed temporary folder: %s' % self.tmp_folder, out_log) return 0
def launch(self) -> int: """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object.""" # Setup Biobb if self.check_restart(): return 0 self.stage_files() # Check if executable is exists if not self.container_path: if not Path(self.pmx_path).is_file(): if not shutil.which(self.pmx_path): raise FileNotFoundError( 'Executable %s not found. Check if it is installed in your system and correctly defined in the properties' % self.pmx_path) list_a_dir = fu.create_unique_dir() list_b_dir = fu.create_unique_dir() list_a = list( filter( lambda f: Path(f).exists() and Path(f).stat().st_size > 10, fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir, self.out_log))) list_b = list( filter( lambda f: Path(f).exists() and Path(f).stat().st_size > 10, fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir, self.out_log))) string_a = " ".join(list_a) string_b = " ".join(list_b) # Copy extra files to container: two directories containing the xvg files if self.container_path: shutil.copytree( list_a_dir, Path(self.stage_io_dict.get("unique_dir")).joinpath( Path(list_a_dir).name)) shutil.copytree( list_b_dir, Path(self.stage_io_dict.get("unique_dir")).joinpath( Path(list_b_dir).name)) container_volume = " " + self.container_volume_path + "/" string_a = self.container_volume_path + "/" + container_volume.join( list_a) string_b = self.container_volume_path + "/" + container_volume.join( list_b) self.cmd = [ self.pmx_path, 'analyse', '-fA', string_a, '-fB', string_b, '-o', self.stage_io_dict["out"]["output_result_path"], '-w', self.stage_io_dict["out"]["output_work_plot_path"] ] if self.method: self.cmd.append('-m') self.cmd.append(self.method) if self.temperature: self.cmd.append('-t') self.cmd.append(str(self.temperature)) if self.nboots: self.cmd.append('-b') self.cmd.append(str(self.nboots)) if self.nblocks: self.cmd.append('-n') self.cmd.append(str(self.nblocks)) if self.integ_only: self.cmd.append('--integ_only') if self.reverseB: self.cmd.append('--reverseB') if self.skip: self.cmd.append('--skip') self.cmd.append(str(self.skip)) if self.slice: self.cmd.append('--slice') self.cmd.append(self.slice) if self.rand: self.cmd.append('--rand') if self.index: self.cmd.append('--index') self.cmd.append(self.index) if self.prec: self.cmd.append('--prec') self.cmd.append(str(self.prec)) if self.units: self.cmd.append('--units') self.cmd.append(self.units) if self.no_ks: self.cmd.append('--no_ks') if self.nbins: self.cmd.append('--nbins') self.cmd.append(str(self.nbins)) if self.dpi: self.cmd.append('--dpi') self.cmd.append(str(self.dpi)) # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() self.tmp_files.extend( [self.stage_io_dict.get("unique_dir"), list_a_dir, list_b_dir]) self.remove_tmp_files() return self.return_code
def launch(self): """Launches the execution of the LeapAddIons module.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # Creating temporary folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) # Water Type # leaprc.water.tip4pew, tip4pd, tip3p, spceb, spce, opc, fb4, fb3 # Values: POL3BOX, QSPCFWBOX, SPCBOX, SPCFWBOX, TIP3PBOX, TIP3PFBOX, TIP4PBOX, TIP4PEWBOX, OPCBOX, OPC3BOX, TIP5PBOX. source_wat_command = "source leaprc.water.tip3p" if self.water_type == "TIP4PEWBOX": source_wat_command = "leaprc.water.tip4pew" if self.water_type == "TIP4PBOX": source_wat_command = "leaprc.water.tip4pd" if re.match(r"SPC", self.water_type): source_wat_command = "source leaprc.water.spce" if re.match(r"OPC", self.water_type): source_wat_command = "source leaprc.water.opc" # Counterions ions_command = "" if self.neutralise: #ions_command = ions_command + "addions mol " + self.negative_ions_type + " 0 \n" #ions_command = ions_command + "addions mol " + self.positive_ions_type + " 0 \n" ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " 0 \n" ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " 0 \n" if self.ionic_concentration and self.negative_ions_number==0 and self.positive_ions_number==0: self.find_out_number_of_ions() nneg = self.nio # Update with function npos = self.nio # Update with function #ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str(nneg) + " \n" #ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str(npos) + " \n" ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " " + str(nneg) + " \n" ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " " + str(npos) + " \n" else: if self.negative_ions_number != 0: #ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str(self.negative_ions_number) + " \n" ions_command = ions_command + "addionsRand mol " + self.negative_ions_type + " " + str(self.negative_ions_number) + " \n" if self.positive_ions_number != 0: #ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str(self.positive_ions_number) + " \n" ions_command = ions_command + "addionsRand mol " + self.positive_ions_type + " " + str(self.positive_ions_number) + " \n" ligands_lib_list = [] if self.io_dict['in']['input_lib_path'] is not None: if self.io_dict['in']['input_lib_path'].endswith('.zip'): ligands_lib_list = fu.unzip_list(self.io_dict['in']['input_lib_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_lib_list.append(self.io_dict['in']['input_lib_path']) ligands_frcmod_list = [] if self.io_dict['in']['input_frcmod_path'] is not None: if self.io_dict['in']['input_frcmod_path'].endswith('.zip'): ligands_frcmod_list = fu.unzip_list(self.io_dict['in']['input_frcmod_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_frcmod_list.append(self.io_dict['in']['input_frcmod_path']) amber_params_list = [] if self.io_dict['in']['input_params_path'] is not None: if self.io_dict['in']['input_params_path'].endswith('.zip'): amber_params_list = fu.unzip_list(self.io_dict['in']['input_params_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: amber_params_list.append(self.io_dict['in']['input_params_path']) leap_source_list = [] if self.io_dict['in']['input_source_path'] is not None: if self.io_dict['in']['input_source_path'].endswith('.zip'): leap_source_list = fu.unzip_list(self.io_dict['in']['input_source_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: leap_source_list.append(self.io_dict['in']['input_source_path']) instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) with open(instructions_file, 'w') as leapin: # Forcefields loaded by default: # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA) #leapin.write("source leaprc.protein.ff14SB \n") # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016) #leapin.write("source leaprc.DNA.bsc1 \n") # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74) #leapin.write("source leaprc.gaff \n") # Forcefields loaded from input forcefield property for t in self.forcefield: leapin.write("source leaprc.{}\n".format(t)) # Additional Leap commands for leap_commands in leap_source_list: leapin.write("source " + leap_commands + "\n") # Water Model loaded from input water_model property leapin.write(source_wat_command + " \n") # Ions Type if self.ions_type != "None": leapin.write("loadamberparams frcmod." + self.ions_type + "\n") # Additional Amber parameters for amber_params in amber_params_list: leapin.write("loadamberparams " + amber_params + "\n") # Ligand(s) libraries (if any) for amber_lib in ligands_lib_list: leapin.write("loadOff " + amber_lib + "\n") for amber_frcmod in ligands_frcmod_list: leapin.write("loadamberparams " + amber_frcmod + "\n") # Loading PDB file leapin.write("mol = loadpdb " + self.io_dict['in']['input_pdb_path'] + " \n") # Adding ions leapin.write(ions_command) # Generating box leapin.write("setBox mol vdw \n") # Saving output PDB file, coordinates and topology leapin.write("savepdb mol " + self.io_dict['out']['output_pdb_path'] + " \n") leapin.write("saveAmberParm mol " + self.io_dict['out']['output_top_path'] + " " + self.io_dict['out']['output_crd_path'] + "\n") leapin.write("quit \n"); # Command line self.cmd = ['tleap ', '-f', instructions_file ] # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() if self.box_type != "cubic": fu.log('Fixing truncated octahedron Box in the topology and coordinates files', self.out_log, self.global_log) # Taking box info from input PDB file, CRYST1 tag (first line) with open(self.io_dict['in']['input_pdb_path']) as file: lines = file.readlines() pdb_line = lines[0] if 'OCTBOX' not in pdb_line: fu.log('WARNING: box info not found in input PDB file (OCTBOX). Needed to correctly assign the octahedron box. Assuming cubic box.',self.out_log, self.global_log) else: # PDB info: CRYST1 86.316 86.316 86.316 109.47 109.47 109.47 P 1 # PDB info: OCTBOX 86.1942924 86.1942924 86.1942924 109.4712190 109.4712190 109.4712190 #regex_box = 'CRYST1\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*P 1' regex_box = 'OCTBOX\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)\s*(\d+\.\d+)' box = re.findall(regex_box, pdb_line)[0] box_line = "" for coord in box: box_line += "{:12.7f}".format(float(coord)) # PRMTOP info: 1.09471219E+02 8.63157502E+01 8.63157502E+01 8.63157502E+01 top_box_line = "" top_box_line += ' %.8E' % Decimal(float(box[3])) top_box_line += ' %.8E' % Decimal(float(box[0])) top_box_line += ' %.8E' % Decimal(float(box[1])) top_box_line += ' %.8E' % Decimal(float(box[2])) # Removing box generated by tleap from the crd file (last line) with open(self.io_dict['out']['output_crd_path']) as file: lines = file.readlines() crd_lines = lines[:-1] # Adding old box coordinates (taken from the input pdb) crd_lines.append(box_line) with open(self.io_dict['out']['output_crd_path'],'w') as file: for line in crd_lines: file.write(str(line)) file.write("\n") # Now fixing IFBOX param in prmtop. box_flag = False ifbox_flag = 0 #%FLAG BOX_DIMENSIONS #%FORMAT(5E16.8) #1.09471219E+02 8.63157502E+01 8.63157502E+01 8.63157502E+01 tmp_parmtop = str(PurePath(self.tmp_folder).joinpath("top_temp.parmtop")) shutil.copyfile(self.io_dict['out']['output_top_path'], tmp_parmtop) with open(self.io_dict['out']['output_top_path'],'w') as new_top: with open(tmp_parmtop,'r') as old_top: for line in old_top: if 'BOX_DIMENSIONS' in line: box_flag = True new_top.write(line) elif box_flag and 'FORMAT' not in line: new_top.write(top_box_line + "\n") box_flag = False elif 'FLAG POINTERS' in line or ifbox_flag==1 or ifbox_flag==2 or ifbox_flag==3: ifbox_flag+=1 new_top.write(line) elif ifbox_flag == 4: #new_top.write(top_box_line + "\n") new_top.write(line[:56] + ' 2' + line[64:]) ifbox_flag+=1 else: new_top.write(line) # remove temporary folder(s) if self.remove_tmp: self.tmp_files.append(self.tmp_folder) self.tmp_files.append("leap.log") self.remove_tmp_files() return self.return_code
def launch(self): """Launches the execution of the LeapSolvate module.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # Creating temporary folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) # Leap configuration (instructions) file instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) box_command = "solvateOct" if self.box_type == "cubic": box_command = "solvateBox" # Forcefield #source_ff_command = "source leaprc." + self.forcefield # Water Type # leaprc.water.tip4pew, tip4pd, tip3p, spceb, spce, opc, fb4, fb3 # Values: POL3BOX, QSPCFWBOX, SPCBOX, SPCFWBOX, TIP3PBOX, TIP3PFBOX, TIP4PBOX, TIP4PEWBOX, OPCBOX, OPC3BOX, TIP5PBOX. source_wat_command = "source leaprc.water.tip3p" if self.water_type == "TIP4PEWBOX": source_wat_command = "leaprc.water.tip4pew" if self.water_type == "TIP4PBOX": source_wat_command = "leaprc.water.tip4pd" if re.match(r"SPC", self.water_type): source_wat_command = "source leaprc.water.spce" if re.match(r"OPC", self.water_type): source_wat_command = "source leaprc.water.opc" # Counterions ions_command = "" if self.neutralise: ions_command = ions_command + "addions mol " + self.negative_ions_type + " 0 \n" ions_command = ions_command + "addions mol " + self.positive_ions_type + " 0 \n" if self.negative_ions_number != 0: ions_command = ions_command + "addions mol " + self.negative_ions_type + " " + str( self.negative_ions_number) + " \n" if self.positive_ions_number != 0: ions_command = ions_command + "addions mol " + self.positive_ions_type + " " + str( self.positive_ions_number) + " \n" ligands_lib_list = [] if self.io_dict['in']['input_lib_path'] is not None: if self.io_dict['in']['input_lib_path'].endswith('.zip'): ligands_lib_list = fu.unzip_list( self.io_dict['in']['input_lib_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_lib_list.append(self.io_dict['in']['input_lib_path']) ligands_frcmod_list = [] if self.io_dict['in']['input_frcmod_path'] is not None: if self.io_dict['in']['input_frcmod_path'].endswith('.zip'): ligands_frcmod_list = fu.unzip_list( self.io_dict['in']['input_frcmod_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_frcmod_list.append( self.io_dict['in']['input_frcmod_path']) amber_params_list = [] if self.io_dict['in']['input_params_path'] is not None: if self.io_dict['in']['input_params_path'].endswith('.zip'): amber_params_list = fu.unzip_list( self.io_dict['in']['input_params_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: amber_params_list.append( self.io_dict['in']['input_params_path']) leap_source_list = [] if self.io_dict['in']['input_source_path'] is not None: if self.io_dict['in']['input_source_path'].endswith('.zip'): leap_source_list = fu.unzip_list( self.io_dict['in']['input_source_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: leap_source_list.append( self.io_dict['in']['input_source_path']) with open(instructions_file, 'w') as leapin: # Forcefields loaded by default: # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA) #leapin.write("source leaprc.protein.ff14SB \n") # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016) #leapin.write("source leaprc.DNA.bsc1 \n") # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74) #leapin.write("source leaprc.gaff \n") # Forcefields loaded from input forcefield property for t in self.forcefield: leapin.write("source leaprc.{}\n".format(t)) # Additional Leap commands for leap_commands in leap_source_list: leapin.write("source " + leap_commands + "\n") # Ions Type if self.ions_type != "None": leapin.write("loadamberparams frcmod." + self.ions_type + "\n") # Additional Amber parameters for amber_params in amber_params_list: leapin.write("loadamberparams " + amber_params + "\n") # Water Model loaded from input water_model property leapin.write(source_wat_command + " \n") # Ligand(s) libraries (if any) for amber_lib in ligands_lib_list: leapin.write("loadOff " + amber_lib + "\n") for amber_frcmod in ligands_frcmod_list: leapin.write("loadamberparams " + amber_frcmod + "\n") # Loading PDB file leapin.write("mol = loadpdb " + self.io_dict['in']['input_pdb_path'] + " \n") # Generating box + adding water molecules leapin.write(box_command + " mol " + self.water_type + " " + str(self.distance_to_molecule) + " " + str(self.closeness)) leapin.write(" iso \n") if self.iso else leapin.write("\n") # Adding counterions leapin.write(ions_command) # Saving output PDB file, coordinates and topology leapin.write("savepdb mol " + self.io_dict['out']['output_pdb_path'] + " \n") leapin.write("saveAmberParm mol " + self.io_dict['out']['output_top_path'] + " " + self.io_dict['out']['output_crd_path'] + "\n") leapin.write("quit \n") # Command line self.cmd = ['tleap ', '-f', instructions_file] # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() # Saving octahedron box with all decimals in PDB file. Needed for the add_ions BB. # Getting octahedron box from generated crd file with open(self.io_dict['out']['output_crd_path'], "r") as file: for line in file: pass # Adding box as a first line in the generated pdb file with OCTBOX tag octbox = "OCTBOX " + line with open(self.io_dict['out']['output_pdb_path'], 'r+') as f: content = f.read() f.seek(0, 0) f.write(octbox + content) # remove temporary folder(s) if self.remove_tmp: self.tmp_files.append(self.tmp_folder) self.tmp_files.append("leap.log") self.remove_tmp_files() return self.return_code
def launch(self) -> int: """Execute the :class:`BindingSite <utils.bindingsite.BindingSite>` utils.bindingsite.BindingSite object.""" # Get local loggers from launchlogger decorator out_log = getattr(self, 'out_log', None) err_log = getattr(self, 'err_log', None) # check input/output paths and parameters self.check_data_params(out_log, err_log) # Check the properties fu.check_properties(self, self.properties) if self.restart: output_file_list = [self.io_dict["out"]["output_pdb_path"]] if fu.check_complete_files(output_file_list): fu.log( 'Restart is enabled, this step: %s will the skipped' % self.step, out_log, self.global_log) return 0 # Parse structure fu.log( 'Loading input PDB structure %s' % (self.io_dict["in"]["input_pdb_path"]), out_log, self.global_log) structure_name = PurePath(self.io_dict["in"]["input_pdb_path"]).name parser = Bio.PDB.PDBParser(QUIET=True) structPDB = parser.get_structure(structure_name, self.io_dict["in"]["input_pdb_path"]) if len(structPDB): structPDB = structPDB[0] # Use only one chain n_chains = structPDB.get_list() if len(n_chains) != 1: fu.log( 'More than one chain found in the input PDB structure. Using only the first chain to find the binding site', out_log, self.global_log) # get first chain in case there is more than one chain for struct_chain in structPDB.get_chains(): structPDB = struct_chain # Get AA sequence structPDB_seq = get_pdb_sequence(structPDB) if len(structPDB_seq) == 0: fu.log( self.__class__.__name__ + ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?' % self.io_dict["in"]["input_pdb_path"], out_log) raise SystemExit( self.__class__.__name__ + ': Cannot extract AA sequence from the input PDB structure %s. Wrong format?' % self.io_dict["in"]["input_pdb_path"]) else: fu.log( 'Found %s residues in %s' % (len(structPDB_seq), self.io_dict["in"]["input_pdb_path"]), out_log) # create temporary folder for decompressing the input_clusters_zip file unique_dir = PurePath(fu.create_unique_dir()) fu.log('Creating %s temporary folder' % unique_dir, out_log, self.global_log) # decompress the input_clusters_zip file cluster_list = fu.unzip_list( zip_file=self.io_dict["in"]["input_clusters_zip"], dest_dir=unique_dir, out_log=out_log) clusterPDB_ligands_aligned = [] clusterPDB_ligands_num = 0 fu.log('Iterating on all clusters:', out_log) for idx, cluster_path in enumerate(cluster_list): cluster_name = PurePath(cluster_path).stem fu.log(' ', out_log) fu.log('------------ Iteration #%s --------------' % (idx + 1), out_log) fu.log('Cluster member: %s' % cluster_name, out_log) # Load and Parse PDB clusterPDB = {} clusterPDB = parser.get_structure(cluster_name, cluster_path)[0] # Use only the first chain for cluster_chain in clusterPDB.get_chains(): clusterPDB = cluster_chain # Looking for ligands clusterPDB_ligands = get_ligand_residues(clusterPDB) if (len(clusterPDB_ligands)) == 0: fu.log( 'No ligands found that could guide the binding site search. Ignoring this member: %s' % cluster_name, out_log) continue # Selecting the largest ligand, if more than one lig_atoms_num = 0 clusterPDB_ligand = {} if self.ligand: if self.ligand in [ x.get_resname() for x in clusterPDB_ligands ]: for lig in clusterPDB_ligands: if lig.get_resname() == self.ligand: clusterPDB_ligand = lig lig_atoms_num = len(lig.get_list()) fu.log( 'Ligand found: %s (%s atoms)' % (lig.get_resname(), lig_atoms_num), out_log) else: fu.log( 'Ligand %s not found in %s cluster member, skipping this cluster' % (self.ligand, cluster_name), out_log) continue else: if len(clusterPDB_ligands) > 1: for lig_res in clusterPDB_ligands: lig_res_atoms_num = len(lig_res.get_list()) fu.log( 'Ligand found: %s (%s atoms)' % (lig_res.get_resname(), lig_res_atoms_num), out_log) if lig_res_atoms_num > lig_atoms_num: clusterPDB_ligand = lig_res lig_atoms_num = lig_res_atoms_num else: clusterPDB_ligand = clusterPDB_ligands[0] lig_atoms_num = len(clusterPDB_ligands[0].get_list()) fu.log( 'Member accepted. Valid ligand found: %s (%s atoms)' % (clusterPDB_ligand.get_resname(), lig_atoms_num), out_log) ## Mapping residues by sequence alignment to match structPDB-clusterPDB paired residues # Get AA sequence clusterPDB_seq = get_pdb_sequence(clusterPDB) # Pairwise align aln, residue_map = align_sequences(structPDB_seq, clusterPDB_seq, self.matrix_name, self.gap_open, self.gap_extend) fu.log( 'Matching residues to input PDB structure. Alignment is:\n%s' % (aln[1]), out_log) # Calculate (gapless) sequence identity seq_identity, gap_seq_identity = calculate_alignment_identity( aln[0], aln[1]) fu.log('Sequence identity (%%): %s' % (seq_identity), out_log) fu.log('Gap less identity (%%): %s' % (gap_seq_identity), out_log) ## Selecting aligned CA atoms from first model, first chain struct_atoms = [] cluster_atoms = [] for struct_res in residue_map: try: cluster_atoms.append( clusterPDB[residue_map[struct_res]]['CA']) struct_atoms.append( get_residue_by_id(structPDB, struct_res)['CA']) except KeyError: fu.log( 'Cannot find CA atom for residue %s (input PDB %s)' % (get_residue_by_id( structPDB, struct_res).get_resname(), struct_res), out_log) pass if len(cluster_atoms) == 0: fu.log( self.__class__.__name__ + ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.' % (cluster_name, structure_name), out_log) raise SystemExit( self.__class__.__name__ + ': Cannot find CA atoms (1st model, 1st chain) in cluster member %s when aligning against %s. Ignoring this member.' % (cluster_name, structure_name)) else: fu.log( 'Superimposing %s aligned protein residues' % (len(cluster_atoms)), out_log) ## Align against input structure si = Bio.PDB.Superimposer() si.set_atoms(struct_atoms, cluster_atoms) si.apply(clusterPDB.get_atoms()) fu.log('RMSD: %s' % (si.rms), out_log) # Save transformed structure (and ligand) clusterPDB_ligand_aligned = clusterPDB[clusterPDB_ligand.get_id()] fu.log('Saving transformed ligand coordinates', out_log) clusterPDB_ligands_aligned.append(clusterPDB_ligand_aligned) ## Stop after n accepted cluster members clusterPDB_ligands_num += 1 if clusterPDB_ligands_num > self.max_num_ligands: break fu.log(' ', out_log) fu.log('----------------------------------------', out_log) fu.log( 'All transformed ligand coordinates saved, getting binding site residues', out_log) ## Select binding site atoms as those around cluster superimposed ligands fu.log( 'Defining binding site residues as those %sÅ around the %s cluster superimposed ligands' % (self.radius, clusterPDB_ligands_num), out_log) # select Atoms from aligned ligands clusterPDB_ligands_aligned2 = [ res for res in clusterPDB_ligands_aligned ] clusterPDB_ligands_aligned_atoms = Bio.PDB.Selection.unfold_entities( clusterPDB_ligands_aligned2, 'A') # select Atoms from input PDB structure structPDB_atoms = [atom for atom in structPDB.get_atoms()] # compute neighbors for aligned ligands in the input PDB structure structPDB_bs_residues_raw = {} structPDB_neighbors = Bio.PDB.NeighborSearch(structPDB_atoms) for ligand_atom in clusterPDB_ligands_aligned_atoms: # look for PDB atoms 5A around each ligand atom k_l = structPDB_neighbors.search(ligand_atom.coord, self.radius, 'R') for k in k_l: structPDB_bs_residues_raw[k.get_id()] = k.get_full_id() ## Save binding site to PDB io = Bio.PDB.PDBIO() fu.log( 'Writing binding site residues into %s' % (self.io_dict["out"]["output_pdb_path"]), out_log) # unselect input PDB atoms not in binding site structPDB_bs_atoms = 0 p = re.compile('H_|W_|W') residue_ids_to_remove = [] for res in structPDB.get_residues(): if res.id not in structPDB_bs_residues_raw.keys(): # add residue to residue_ids_to_remove list residue_ids_to_remove.append(res.id) elif p.match(res.resname): # add residue to residue_ids_to_remove list residue_ids_to_remove.append(res.id) else: # this residue will be preserved structPDB_bs_atoms += len(res.get_list()) # unselect input PDB atoms not in binding site for chain in structPDB: for idr in residue_ids_to_remove: chain.detach_child(idr) # write PDB file io.set_structure(structPDB) io.save(self.io_dict["out"]["output_pdb_path"]) if self.remove_tmp: # remove temporary folder fu.rm(unique_dir) fu.log(' ', out_log) fu.log('----------------------------------------', out_log) fu.log('Removed temporary folder: %s' % unique_dir, out_log) return 0
def launch(self): """Launches the execution of the LeapGenTop module.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # Creating temporary folder self.tmp_folder = fu.create_unique_dir() fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) # Leap configuration (instructions) file instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) ligands_lib_list = [] if self.io_dict['in']['input_lib_path'] is not None: if self.io_dict['in']['input_lib_path'].endswith('.zip'): ligands_lib_list = fu.unzip_list(self.io_dict['in']['input_lib_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_lib_list.append(self.io_dict['in']['input_lib_path']) ligands_frcmod_list = [] if self.io_dict['in']['input_frcmod_path'] is not None: if self.io_dict['in']['input_frcmod_path'].endswith('.zip'): ligands_frcmod_list = fu.unzip_list(self.io_dict['in']['input_frcmod_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: ligands_frcmod_list.append(self.io_dict['in']['input_frcmod_path']) amber_params_list = [] if self.io_dict['in']['input_params_path'] is not None: if self.io_dict['in']['input_params_path'].endswith('.zip'): amber_params_list = fu.unzip_list(self.io_dict['in']['input_params_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: amber_params_list.append(self.io_dict['in']['input_params_path']) leap_source_list = [] if self.io_dict['in']['input_source_path'] is not None: if self.io_dict['in']['input_source_path'].endswith('.zip'): leap_source_list = fu.unzip_list(self.io_dict['in']['input_source_path'], dest_dir=self.tmp_folder, out_log=self.out_log) else: leap_source_list.append(self.io_dict['in']['input_source_path']) with open(instructions_file, 'w') as leapin: # Forcefields loaded by default: # Protein: ff14SB (PARM99 + frcmod.ff99SB + frcmod.parmbsc0 + OL3 for RNA) #leapin.write("source leaprc.protein.ff14SB \n") # DNA: parmBSC1 (ParmBSC1 (ff99 + bsc0 + bsc1) for DNA. Ivani et al. Nature Methods 13: 55, 2016) #leapin.write("source leaprc.DNA.bsc1 \n") # Ligands: GAFF (General Amber Force field, J. Comput. Chem. 2004 Jul 15;25(9):1157-74) #leapin.write("source leaprc.gaff \n") # Forcefields loaded from input forcefield property for t in self.forcefield: leapin.write("source leaprc.{}\n".format(t)) # Additional Leap commands for leap_commands in leap_source_list: leapin.write("source " + leap_commands + "\n") # Additional Amber parameters for amber_params in amber_params_list: leapin.write("loadamberparams " + amber_params + "\n") # Ions libraries leapin.write("loadOff atomic_ions.lib \n") # Ligand(s) libraries (if any) for amber_lib in ligands_lib_list: leapin.write("loadOff " + amber_lib + "\n") for amber_frcmod in ligands_frcmod_list: leapin.write("loadamberparams " + amber_frcmod + "\n") # Loading PDB file leapin.write("mol = loadpdb " + self.io_dict['in']['input_pdb_path'] + " \n") # Saving output PDB file, coordinates and topology leapin.write("savepdb mol " + self.io_dict['out']['output_pdb_path'] + " \n") leapin.write("saveAmberParm mol " + self.io_dict['out']['output_top_path'] + " " + self.io_dict['out']['output_crd_path'] + "\n") leapin.write("quit \n"); # Command line self.cmd = ['tleap ', '-f', instructions_file ] # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() # remove temporary folder(s) if self.remove_tmp: self.tmp_files.append(self.tmp_folder) self.tmp_files.append("leap.log") self.remove_tmp_files() return self.return_code