def run_cca(run_mr, target_resolution, args): # print "run_cca" z0 = 0 solvent_content = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(target_resolution) if args.np > 0: i0.addCOMP_PROT_NRES_NUM(args.np, 1) if args.na > 0: i0.addCOMP_NUCL_NRES_NUM(args.na, 1) r1 = phaser.runCCA(i0) if r1.Success(): z0 = r1.getBestZ() solvent_content = 1 - (1.23 / r1.getBestVM()) del r1 return (z0, solvent_content)
def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1-(1.23/r1.getBestVM()) del(r1) return (z0, sc0)
def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1 - (1.23 / r1.getBestVM()) del (r1) return (z0, sc0)
def run_cca(run_mr, target_resolution, args): # print "run_cca" z0 = 0 solvent_content = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(target_resolution) if args.np > 0: i0.addCOMP_PROT_NRES_NUM(args.np, 1) if args.na > 0: i0.addCOMP_NUCL_NRES_NUM(args.na, 1) r1 = phaser.runCCA(i0) if r1.Success(): z0 = r1.getBestZ() solvent_content = 1-(1.23/r1.getBestVM()) del r1 return (z0, solvent_content)
def calculate_solvent(root, data, seqin, ncs_copies, highres, logfile): input = phaser.InputCCA() input.setSPAC_HALL(data.getSpaceGroupHall()) input.setCELL6(data.getUnitCell()) input.addCOMP_PROT_SEQ_NUM(seqin, ncs_copies) input.setRESO(highres, 10000) input.setMUTE(True) cca = phaser.runCCA(input) with open(logfile, 'w') as cca_log: print(data.logfile(), file=cca_log) # check if solvent content is higher than average if cca.getBestZ() > 1: log.warning ('*** Warning solvent content estimated by Phaser is %0.2f' % (1.0 - 1.232/cca.getBestVM())) if ncs_copies > 1: log.warning(' solvent content calculated from %d copies of sequence is %0.2f\n' % (ncs_copies, (1.0 - 1.232/cca.getVM()[0]))) else: log.warning(' solvent content calculated from 1 copy of sequence is %0.2f\n' % (1.0 - 1.232/cca.getVM()[0])) elif ncs_copies > 1: log.info('Solvent content calculated from %d copies of sequence is %0.2f\n' % (ncs_copies, (1.0 - 1.232/cca.getVM()[0]))) else: log.info('Solvent content calculated from 1 copy of sequence is %0.2f\n' % (1.0 - 1.232/cca.getVM()[0])) return cca.getBestZ(), cca.getBestVM(), cca.getZ()[0], cca.getVM()[0]
def run(self, models_dir, nproc=2, shres=3.0, pklim=0.5, npic=50, rotastep=1.0, min_solvent_content=20, submit_nproc=None, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run amore rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on shres : int, float, optional Spherical harmonic resolution [default 3.0] pklim : int, float, optional Peak limit, output all peaks above <float> [default: 0.5] npic : int, optional Number of peaks to output from the translation function map for each orientation [default: 50] rotastep : int, float, optional Size of rotation step [default : 1.0] min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_nproc : int The number of processors to use on the head node when creating submission scripts on a cluster [default: 1] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.shres = shres self.pklim = pklim self.npic = npic self.rotastep = rotastep self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz) i = InputMR_DAT() i.setHKLI(self.mtz) i.setLABI_F_SIGF(mtz_labels.f, mtz_labels.sigf) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) self.script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(self.script_log_dir) self.hklpck0 = self._generate_hklpck0() self.ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: self.template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: self.template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) try: solvent_content = sol_calc.calculate_from_struct(pdb_struct) if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue except ValueError: msg = "Skipping %s: Error calculating solvent content" logger.debug(msg, name) continue except IndexError: msg = "Skipping %s: Problem with dat file" logger.debug(msg, name) continue x, y, z, intrad = pdb_struct.integration_box model_molecular_weight = pdb_struct.molecular_weight mw_diff = abs(predicted_molecular_weight - model_molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) n_files = len(sorted_dat_models) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles( n_files, chunk_size) if submit_qtype == 'local': processes = nproc else: processes = submit_nproc results = [] iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) if self.solution: logger.info( "Early termination criteria met, skipping chunk %d", cycle + 1) continue collector = ScriptCollector(None) amore_files = [] with pool.Pool(processes=processes) as p: [(collector.add(i[0]), amore_files.append(i[1])) for i in p.map(self, sorted_dat_models[i:i + chunk_size]) if i is not None] if len(collector.scripts) > 0: logger.info("Running AMORE tab/rot functions") amore_logs, dat_models = zip(*amore_files) simbad.util.submit_chunk(collector, self.script_log_dir, nproc, 'simbad_amore', submit_qtype, submit_queue, True, monitor, self.rot_succeeded_log) for dat_model, amore_log in zip(dat_models, amore_logs): base = os.path.basename(amore_log) pdb_code = base.replace("amore_", "").replace(".log", "") try: rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser( amore_log) score = simbad.core.amore_score.AmoreRotationScore( pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma, rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p, rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score, rotsearch_parser.num_of_rot) if rotsearch_parser.cc_f_z_score: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(self.script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run(self, models_dir, nproc=2, min_solvent_content=20, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run phaser rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.f, self.sigf, self.i, self.sigi, _, _, _ = simbad.util.mtz_util.get_labels(self.mtz) self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) n_files = len(self.simbad_dat_files) i = InputMR_DAT() i.setHKLI(self.mtz) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size) mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(script_log_dir) ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(pdb_struct) solvent_content = solvent_fraction * 100 if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None, solvent_fraction, n_copies) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) template_model = os.path.join("$CCP4_SCR", "{0}.pdb") phaser_files = [] for dat_model in sorted_dat_models[i:i + chunk_size]: logger.debug("Generating script to perform PHASER rotation " + "function on %s", dat_model.pdb_code) pdb_model = template_model.format(dat_model.pdb_code) template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log") conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\"" conv_py = conv_py.format(dat_model.dat_path, pdb_model) rot_log = template_rot_log.format(dat_model.pdb_code) tmp_dir = template_tmp_dir.format(dat_model.pdb_code) phaser_cmd = [ "simbad.rotsearch.phaser_rotation_search", "-hklin", self.mtz, "-f", self.f, "-sigf", self.sigf, "-i", self.i, "-sigi", self.sigi, "-pdbin", pdb_model, "-logfile", rot_log, "-solvent", dat_model.solvent, "-nmol", dat_model.nmol, "-work_dir", tmp_dir, ] phaser_cmd = " ".join(str(e) for e in phaser_cmd) cmd = [ [EXPORT, "CCP4_SCR=" + tmp_dir], ["mkdir", "-p", "$CCP4_SCR\n"], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-m", phaser_cmd, os.linesep], ["rm", "-rf", "$CCP4_SCR\n"], [EXPORT, "CCP4_SCR=" + ccp4_scr], ] phaser_script = pyjob.misc.make_script( cmd, directory=script_log_dir, prefix="phaser_", stem=dat_model.pdb_code) phaser_log = phaser_script.rsplit(".", 1)[0] + '.log' phaser_files += [(phaser_script, phaser_log, dat_model.dat_path)] results = [] if len(phaser_files) > 0: logger.info("Running PHASER rotation functions") phaser_scripts, phaser_logs, dat_models = zip(*phaser_files) simbad.rotsearch.submit_chunk(phaser_scripts, script_log_dir, nproc, 'simbad_phaser', submit_qtype, submit_queue, monitor, self.rot_succeeded_log) for dat_model, phaser_log in zip(dat_models, phaser_logs): base = os.path.basename(phaser_log) pdb_code = base.replace("phaser_", "").replace(".log", "") try: phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(phaser_log) if phaser_rotation_parser.rfact: phaser_rotation_parser.llg = 100 phaser_rotation_parser.rfz = 10 score = simbad.core.phaser_score.PhaserRotationScore( pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz) if phaser_rotation_parser.rfz: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run(self, models_dir, nproc=2, min_solvent_content=20, submit_nproc=None, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run phaser rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_nproc : int The number of processors to use on the head node when creating submission scripts on a cluster [default: 1] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz) self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) i = InputMR_DAT() i.setHKLI(self.mtz) i.setLABI_F_SIGF(self.mtz_labels.f, self.mtz_labels.sigf) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) self.script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(self.script_log_dir) self.ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: self.template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: self.template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct( pdb_struct) solvent_content = solvent_fraction * 100 if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None, solvent_fraction, n_copies) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) n_files = len(sorted_dat_models) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles( n_files, chunk_size) results = [] iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) if self.solution: logger.info( "Early termination criteria met, skipping chunk %d", cycle + 1) continue self.template_model = os.path.join("$CCP4_SCR", "{0}.pdb") if submit_qtype == 'local': processes = nproc else: processes = submit_nproc collector = ScriptCollector(None) phaser_files = [] with pool.Pool(processes=processes) as p: [(collector.add(i[0]), phaser_files.append(i[1])) for i in p.map(self, sorted_dat_models[i:i + chunk_size]) if i is not None] if len(phaser_files) > 0: logger.info("Running PHASER rotation functions") phaser_logs, dat_models = zip(*phaser_files) simbad.util.submit_chunk(collector, self.script_log_dir, nproc, 'simbad_phaser', submit_qtype, submit_queue, True, monitor, self.rot_succeeded_log) for dat_model, phaser_log in zip(dat_models, phaser_logs): base = os.path.basename(phaser_log) pdb_code = base.replace("phaser_", "").replace(".log", "") try: phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser( phaser_log) if phaser_rotation_parser.rfact: phaser_rotation_parser.llg = 100 phaser_rotation_parser.rfz = 10 score = simbad.core.phaser_score.PhaserRotationScore( pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz) if phaser_rotation_parser.rfz: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(self.script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run(self, models_dir, nproc=2, shres=3.0, pklim=0.5, npic=50, rotastep=1.0, min_solvent_content=20, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run amore rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on shres : int, float, optional Spherical harmonic resolution [default 3.0] pklim : int, float, optional Peak limit, output all peaks above <float> [default: 0.5] npic : int, optional Number of peaks to output from the translation function map for each orientation [default: 50] rotastep : int, float, optional Size of rotation step [default : 1.0] min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) n_files = len(self.simbad_dat_files) i = InputMR_DAT() i.setHKLI(self.mtz) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size) sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(script_log_dir) hklpck0 = self._generate_hklpck0() ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") template_hklpck1 = os.path.join("$CCP4_SCR", "{0}.hkl") template_clmn0 = os.path.join("$CCP4_SCR", "{0}_spmipch.clmn") template_clmn1 = os.path.join("$CCP4_SCR", "{0}.clmn") template_mapout = os.path.join("$CCP4_SCR", "{0}_amore_cross.map") template_table1 = os.path.join("$CCP4_SCR", "{0}_sfs.tab") template_model = os.path.join("$CCP4_SCR", "{0}.pdb") template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) try: solvent_content = sol_calc.calculate_from_struct(pdb_struct) if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue except ValueError: msg = "Skipping %s: Error calculating solvent content" logger.debug(msg, name) x, y, z, intrad = pdb_struct.integration_box model_molecular_weight = pdb_struct.molecular_weight mw_diff = abs(predicted_molecular_weight - model_molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) amore_files = [] for dat_model in sorted_dat_models[i:i + chunk_size]: logger.debug("Generating script to perform AMORE rotation " + "function on %s", dat_model.pdb_code) pdb_model = template_model.format(dat_model.pdb_code) table1 = template_table1.format(dat_model.pdb_code) hklpck1 = template_hklpck1.format(dat_model.pdb_code) clmn0 = template_clmn0.format(dat_model.pdb_code) clmn1 = template_clmn1.format(dat_model.pdb_code) mapout = template_mapout.format(dat_model.pdb_code) conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\"" conv_py = conv_py.format(dat_model.dat_path, pdb_model) tab_cmd = [self.amore_exe, "xyzin1", pdb_model, "xyzout1", pdb_model, "table1", table1] tab_stdin = self.tabfun_stdin_template.format( x=dat_model.x, y=dat_model.y, z=dat_model.z, a=90, b=90, c=120) rot_cmd = [ self.amore_exe, 'table1', table1, 'HKLPCK1', hklpck1, 'hklpck0', hklpck0, 'clmn1', clmn1, 'clmn0', clmn0, 'MAPOUT', mapout ] rot_stdin = self.rotfun_stdin_template.format( shres=shres, intrad=dat_model.intrad, pklim=pklim, npic=npic, step=rotastep) rot_log = template_rot_log.format(dat_model.pdb_code) tmp_dir = template_tmp_dir.format(dat_model.pdb_code) cmd = [ [EXPORT, "CCP4_SCR=" + tmp_dir], ["mkdir", "-p", "$CCP4_SCR\n"], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep], tab_cmd + ["<< eof >", os.devnull], [tab_stdin], ["eof"], [os.linesep], rot_cmd + ["<< eof >", rot_log], [rot_stdin], ["eof"], [os.linesep], ["grep", "-m 1", "SOLUTIONRCD", rot_log, os.linesep], ["rm", "-rf", "$CCP4_SCR\n"], [EXPORT, "CCP4_SCR=" + ccp4_scr], ] amore_script = pyjob.misc.make_script( cmd, directory=script_log_dir, prefix="amore_", stem=dat_model.pdb_code) amore_log = amore_script.rsplit(".", 1)[0] + '.log' amore_files += [(amore_script, tab_stdin, rot_stdin, amore_log, dat_model.dat_path)] results = [] if len(amore_files) > 0: logger.info("Running AMORE tab/rot functions") amore_scripts, _, _, amore_logs, dat_models = zip(*amore_files) simbad.rotsearch.submit_chunk(amore_scripts, script_log_dir, nproc, 'simbad_amore', submit_qtype, submit_queue, monitor, self.rot_succeeded_log) for dat_model, amore_log in zip(dat_models, amore_logs): base = os.path.basename(amore_log) pdb_code = base.replace("amore_", "").replace(".log", "") try: rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(amore_log) score = simbad.core.amore_score.AmoreRotationScore( pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma, rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p, rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score, rotsearch_parser.num_of_rot) if rotsearch_parser.cc_f_z_score: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)