def preprocess_pdbbind(pdbbind_dir): """Preprocess pdbbind files for Binana.""" assert os.path.isdir(pdbbind_dir) # Extract the subdirectories in pdbbind_dir subdirs = [d for d in os.listdir(pdbbind_dir) if os.path.isdir(os.path.join(pdbbind_dir, d))] print "About to preprocess following subdirectories:" print subdirs for count, dirname in enumerate(subdirs): print "Processing %d-th entry %s" % (count, dirname) subdir = os.path.join(pdbbind_dir, dirname) ligand, protein = None, None for molfile in os.listdir(subdir): if "_ligand.mol2" in molfile: print "Input ligand: %s" % molfile ligand = molfile elif "_protein.pdb" in molfile: print "Input protein: %s" % molfile protein = molfile if not ligand or not protein: raise ValueError("Ligand or Protein missing in %s" % dirname) ligand_file = os.path.join(subdir, ligand) protein_file = os.path.join(subdir, protein) print "About to preprocess ligand." hydrogenate_and_compute_partial_charges(ligand_file, "mol2", subdir) print "About to preprocess protein." hydrogenate_and_compute_partial_charges(protein_file, "pdb", subdir)
def launch_preprocessing_jobs(dude_dir, script_dir, script_template, num_jobs): """Launch jobs to perform required preprocessing jobs.""" assert os.path.isdir(dude_dir) target_dir = os.path.join(dude_dir, target) # Extract list of actives and decoys actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] # Just for debugging purposes actives = actives[:1] decoys = decoys[:1] # Hydrogenate and process the receptor receptor = os.path.join(target_dir, "receptor.pdb") hydrogenate_and_compute_partial_charges( receptor, "pdb", target_dir) receptor_pdbqt = os.path.join(target_dir, "receptor_hyd.pdbqt") # Compute work per job sources = ["active"] * len(actives) + ["decoy"] * len(decoys) compounds = actives + decoys num_per_job = int(math.ceil(len(compounds)/float(num_jobs))) print "Number per job: %d" % num_per_job
def pdbbind_job(pdb_directories): """Processes all pdbs in provided directories.""" for count, dirname in enumerate(pdb_directories): print "Processing %d-th entry %s" % (count, dirname) ligand, protein = None, None for molfile in os.listdir(dirname): if "_ligand.mol2" in molfile: print "Input ligand: %s" % molfile ligand = molfile elif "_protein.pdb" in molfile: print "Input protein: %s" % molfile protein = molfile if not ligand or not protein: raise ValueError("Ligand or Protein missing in %s" % dirname) ligand_file = os.path.join(dirname, ligand) protein_file = os.path.join(dirname, protein) print "About to preprocess ligand." hydrogenate_and_compute_partial_charges(ligand_file, "mol2", dirname) print "About to preprocess protein." hydrogenate_and_compute_partial_charges(protein_file, "pdb", dirname)
def preprocess_dude(compounds): """Preprocess dude compounds for docking.""" with open(conf_template, "rb") as f: conf = f.read() for active in actives: active_file = os.path.join(actives_dir, active) active_base = active.split(".")[0] active_output = os.path.join(actives_dir, "%s_hyd.pdbqt" % active_base) conf_active_file = os.path.join(actives_dir, "conf_%s.txt" % active_base) print active_file print active_output print conf_active_file hydrogenate_and_compute_partial_charges(active_file, "pdb", actives_dir, rigid=False) active_conf = conf % (receptor_pdbqt, active_output) with open(conf_active_file, "wb") as f: f.write(active_conf) print active_conf for decoy in decoys: decoy_file = os.path.join(decoys_dir, decoy) decoy_base = os.path.basename(decoy_file).split(".")[0] decoy_output = os.path.join(decoys_dir, "%s_hyd.pdbqt" % decoy_base) conf_decoy_file = os.path.join(decoys_dir, "conf_%s.txt" % decoy_base) print decoy_file print decoy_output print conf_decoy_file hydrogenate_and_compute_partial_charges(decoy_file, "pdb", decoys_dir, rigid=False) decoy_conf = conf % (receptor_pdbqt, decoy_output) with open(conf_decoy_file, "wb") as f: f.write(decoy_conf) print decoy_conf
def preprocess_dude(compounds): """Preprocess dude compounds for docking.""" with open(conf_template, "rb") as f: conf = f.read() for active in actives: active_file = os.path.join(actives_dir, active) active_base = active.split(".")[0] active_output = os.path.join(actives_dir, "%s_hyd.pdbqt" % active_base) conf_active_file = os.path.join(actives_dir, "conf_%s.txt" % active_base) print active_file print active_output print conf_active_file hydrogenate_and_compute_partial_charges( active_file, "pdb", actives_dir, rigid=False) active_conf = conf % (receptor_pdbqt, active_output) with open(conf_active_file, "wb") as f: f.write(active_conf) print active_conf for decoy in decoys: decoy_file = os.path.join(decoys_dir, decoy) decoy_base = os.path.basename(decoy_file).split(".")[0] decoy_output = os.path.join(decoys_dir, "%s_hyd.pdbqt" % decoy_base) conf_decoy_file = os.path.join(decoys_dir, "conf_%s.txt" % decoy_base) print decoy_file print decoy_output print conf_decoy_file hydrogenate_and_compute_partial_charges( decoy_file, "pdb", decoys_dir, rigid=False) decoy_conf = conf % (receptor_pdbqt, decoy_output) with open(conf_decoy_file, "wb") as f: f.write(decoy_conf) print decoy_conf
def _featurize_complex(self, mol_pdb, protein_pdb): """ Compute Binana fingerprint for complex. """ ### OPEN TEMPDIR tempdir = tempfile.mkdtemp() mol_pdb_file = os.path.join(tempdir, "mol.pdb") with open(mol_pdb_file, "w") as mol_f: mol_f.writelines(mol_pdb) protein_pdb_file = os.path.join(tempdir, "protein.pdb") with open(protein_pdb_file, "w") as protein_f: protein_f.writelines(protein_pdb) mol_hyd_file = os.path.join(tempdir, "mol_hyd.pdb") mol_pdbqt_file = os.path.join(tempdir, "mol_hyd.pdbqt") hydrogenate_and_compute_partial_charges( mol_pdb_file, "pdb", tempdir, mol_hyd_file, mol_pdbqt_file) protein_hyd_file = os.path.join(tempdir, "protein_hyd.pdb") protein_pdbqt_file = os.path.join(tempdir, "protein_hyd.pdbqt") hydrogenate_and_compute_partial_charges( protein_pdb_file, "pdb", tempdir, protein_hyd_file, protein_pdbqt_file) mol_pdb_obj = PDB() mol_pdb_obj.load_from_files(mol_pdb_file, mol_pdbqt_file) protein_pdb_obj = PDB() protein_pdb_obj.load_from_files(protein_pdb_file, protein_pdbqt_file) features = self.binana.compute_input_vector(mol_pdb_obj, protein_pdb_obj) ### CLOSE TEMPDIR shutil.rmtree(tempdir) return features
def launch_preprocessing_jobs(dude_dir, script_dir, script_template, num_jobs): """Launch jobs to perform required preprocessing jobs.""" assert os.path.isdir(dude_dir) target_dir = os.path.join(dude_dir, target) # Extract list of actives and decoys actives_dir = os.path.join(target_dir, "actives") decoys_dir = os.path.join(target_dir, "decoys") actives = [a for a in os.listdir(actives_dir)] decoys = [a for a in os.listdir(decoys_dir)] # Just for debugging purposes actives = actives[:1] decoys = decoys[:1] # Hydrogenate and process the receptor receptor = os.path.join(target_dir, "receptor.pdb") hydrogenate_and_compute_partial_charges(receptor, "pdb", target_dir) receptor_pdbqt = os.path.join(target_dir, "receptor_hyd.pdbqt") # Compute work per job sources = ["active"] * len(actives) + ["decoy"] * len(decoys) compounds = actives + decoys num_per_job = int(math.ceil(len(compounds) / float(num_jobs))) print "Number per job: %d" % num_per_job
def main(input_file, input_format, output_directory): """Preprocesses provided files.""" hydrogenate_and_compute_partial_charges(input_file, input_format, output_directory)