def test_batch(self): """ test writing batch file using most of the features """ with TemporaryDirectory() as temp_dir: script = path.join(temp_dir, "test.sh") batch = self.hpc.new_batch_file("test", script, "out", "err", cpus=2) # use all options batch.set_options( time=Duration(hours=2), exclusive=True, nodes=4, mem_per_cpu=DataSize(gigabyte=1), partition="sea", memory=DataSize(gigabyte=2), ) # add available flavors of commands batch.assign_variable("FOO", "BAR") batch.add_command("single_command") batch.add_commands("command1", "command2") batch.purge_modules() batch.load_modules(["mod1", "mod2"]) batch.load_python_env() batch.save() # check that written file on disk looks as expected self._assert_file(script, EXPECTED_BATCH)
def run_dials(proj, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.DIALS_MOD] jobs = JobsSet("XIA2/DIALS") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")): outdir, image_file = _get_dataset_params(proj, dset) spg = get_space_group_argument(Tools.DIALS, options["spacegroup"]) if options["cellparam"] != "": cellpar = ",".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"unit_cell={cellpar}" else: unit_cell = "" customdials = options["customdials"] if options["friedel_law"] == "true": friedel = "atom=X" else: friedel = "" dials_commands, cpus = get_xia_dials_commands(spg, unit_cell, customdials, friedel, image_file, dset.images) batch = hpc.new_batch_file( "DIALS", project_script(proj, f"dials_fragmax_part{num}.sh"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/dials", f"cd {outdir}/dials", *dials_commands, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "dials", dset, batch) jobs.submit()
def run_xdsapp(project, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.XDSAPP_MOD] jobs = JobsSet("XDSAPP") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")): outdir, image_file = _get_dataset_params(project, dset) if options["spacegroup"] is not None: cellpar = " ".join(options["cellparam"].split(",")) spacegroup = get_space_group_argument(Tools.XDSAPP, options["spacegroup"]) spg = f"--spacegroup='{spacegroup} {cellpar}'" else: spg = "" customxdsapp = options["customxdsapp"] if options["friedel_law"] == "true": friedel = "--fried=True" else: friedel = "--fried=False" xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp, friedel, image_file, dset.images) batch = hpc.new_batch_file( "XDSAPP", project_script(project, f"xdsapp_fragmax_part{num}.sh"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/xdsapp", f"cd {outdir}/xdsapp", xdsapp_command, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, "xdsapp", dset, batch) jobs.submit()
def test_hours(self): """ test specifying time using only hours """ self.batch.set_options(time=Duration(hours=6)) self.assertEqual( self.batch._body, f"{self.SBATCH_HEADER}#SBATCH --time=06:00:00\n", )
def test_hms(self): """ test specifying time using hours, minutes and seconds """ self.batch.set_options(time=Duration(hours=128, minutes=23, seconds=7)) self.assertEqual( self.batch._body, f"{self.SBATCH_HEADER}#SBATCH --time=128:23:07\n", )
def test_seconds(self): """ test specifying time using only seconds """ self.batch.set_options(time=Duration(seconds=5)) self.assertEqual( self.batch._body, f"{self.SBATCH_HEADER}#SBATCH --time=00:00:05\n", )
def test_minutes(self): """ test specifying time using only minutes """ self.batch.set_options(time=Duration(minutes=9)) self.assertEqual( self.batch._body, f"{self.SBATCH_HEADER}#SBATCH --time=00:09:00\n", )
def run_autoproc(proj, filters, options): # Modules list for HPC env softwares = [ "gopresto", versions.CCP4_MOD, versions.AUTOPROC_MOD, versions.DURIN_MOD, ] jobs = JobsSet("autoPROC") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")): batch = hpc.new_batch_file( "autoPROC", project_script(proj, f"autoproc_fragmax_part{num}.sh"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"), ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, cpus_per_task=64, memory=DataSize(gigabyte=300), ) batch.purge_modules() batch.load_modules(softwares) outdir, h5master, sample, num_images = _get_dataset_params(proj, dset) if options["spacegroup"] != "": spacegroup = options["spacegroup"] spg = f"symm='{spacegroup}'" else: spg = "" if options["cellparam"] != "": cellpar = " ".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"cell='{cellpar}'" else: unit_cell = "" customautoproc = options["customautoproc"] if options["friedel_law"] == "true": friedel = "-ANO" else: friedel = "-noANO" batch.add_commands( f"rm -rf {outdir}/autoproc", f"mkdir -p {outdir}", f"cd {outdir}", get_autoproc_command(outdir, spg, unit_cell, customautoproc, friedel, h5master, num_images), ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "autoproc", dset, batch) jobs.submit()
def launch_refine_jobs( project: Project, filters, pdb_file, space_group, run_aimless, refine_tool, refine_tool_commands, cpus, ): epoch = round(time.time()) jobs = JobsSet("Refine") hpc = SITE.get_hpc_runner() for dset in get_refine_datasets(project, filters, refine_tool): for tool, input_mtz in _find_input_mtzs(project, dset): batch = hpc.new_batch_file( f"refine {tool} {dset.name}", project_script(project, f"refine_{tool}_{refine_tool}_{dset.name}.sh"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=12), nodes=1, mem_per_cpu=DataSize(gigabyte=5), ) batch.add_commands(crypt_shell.crypt_cmd(project)) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb_file, "model.pdb"), crypt_shell.fetch_file(project, input_mtz, "input.mtz"), ) # TODO: load tool specific modules? batch.load_modules(HPC_MODULES) if run_aimless: batch.add_commands( _aimless_cmd(space_group.short_name, "input.mtz")) results_dir = Path(project.get_dataset_results_dir(dset), tool) batch.add_commands( *refine_tool_commands, _upload_result_cmd(project, results_dir), "cd", "rm -rf $WORK_DIR", ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, refine_tool, dset, batch) jobs.submit()
def _write_main_script( project: Project, pandda_dir: Path, method, methodshort, options ): epoch = round(time.time()) log_prefix = project_log_path(project, f"PanDDA_{method}_{epoch}_%j_") pandda_script = project_script(project, PANDDA_WORKER) giant_cluster = "/mxn/groups/biomax/wmxsoft/pandda/bin/giant.datasets.cluster" if options["reprocessZmap"]: pandda_cluster = "" else: pandda_cluster = f"{giant_cluster} ./*/final.pdb pdb_label=foldername" hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PDD{methodshort}", project_script(project, f"pandda_{method}.sh"), f"{log_prefix}out.txt", f"{log_prefix}err.txt", cpus=40, ) batch.set_options( time=Duration(hours=99), exclusive=True, nodes=1, ) if project.encrypted: # TODO: implement this? raise NotImplementedError("pandda for encrypted projects") # batch.add_command(crypt_shell.crypt_cmd(project)) # batch.assign_variable("WORK_DIR", "`mktemp -d`") # batch.add_commands( # "cd $WORK_DIR", crypt_shell.fetch_dir(project, data_dir, ".") # ) # # batch.load_modules(["gopresto", versions.CCP4_MOD, versions.PYMOL_MOD]) # batch.add_commands( # pandda_cluster, # f'python {pandda_script} . {project.protein} "{options}"', # crypt_shell.upload_dir( # project, "$WORK_DIR/pandda", path.join(data_dir, "pandda") # ), # crypt_shell.upload_dir( # project, # "$WORK_DIR/clustered-datasets", # path.join(data_dir, "clustered-datasets"), # ), # ) else: batch.add_command(f"cd {pandda_dir}") add_pandda_init_commands(batch) batch.add_commands( pandda_cluster, f'python {pandda_script} {pandda_dir} {project.protein} "{options}"', f"chmod -R 777 {project.pandda_dir}", ) # add commands to fix symlinks ln_command = '\'ln -f "$(readlink -m "$0")" "$0"\' {} \\;' batch.add_commands( f"cd {project.pandda_dir}; find -type l -iname *-pandda-input.* -exec bash -c {ln_command}", f"cd {project.pandda_dir}; find -type l -iname *pandda-model.pdb -exec bash -c {ln_command}", ) batch.save() return batch
def _write_prepare_script( project: Project, rn, method, dataset, pdb, mtz, resHigh, free_r_flag, native_f, sigma_fp, cif_method, ): epoch = round(time.time()) output_dir = Path(project.pandda_method_dir(method), dataset.name) hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PnD{rn}", project_script(project, f"pandda_prepare_{method}_{dataset.name}.sh"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_out.txt"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(minutes=15), memory=DataSize(gigabyte=5)) batch.add_command(crypt_shell.crypt_cmd(project)) batch.assign_variable("DEST_DIR", output_dir) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb, "final.pdb"), crypt_shell.fetch_file(project, mtz, "final.mtz"), ) batch.purge_modules() batch.load_modules( ["gopresto", versions.PHENIX_MOD, versions.CCP4_MOD, versions.BUSTER_MOD] ) if not dataset.crystal.is_apo(): fragment = get_crystals_fragment(dataset.crystal) # non-apo crystal should have a fragment assert fragment if cif_method == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output=$WORK_DIR/{fragment.code} --opt\n" else: assert cif_method == "grade" cif_cmd = ( f"grade '{fragment.smiles}' -ocif $WORK_DIR/{fragment.code}.cif " f"-opdb $WORK_DIR/{fragment.code}.pdb -nomogul\n" ) batch.add_command(cif_cmd) batch.add_commands( f'printf "monitor BRIEF\\n labin file 1 -\\n ALL\\n resolution file 1 999.0 {resHigh}\\n" | \\\n' " cad hklin1 $WORK_DIR/final.mtz hklout $WORK_DIR/final.mtz", "uniqueify -f FreeR_flag $WORK_DIR/final.mtz $WORK_DIR/final.mtz", f'printf "COMPLETE FREE={free_r_flag} \\nEND\\n" | \\\n' " freerflag hklin $WORK_DIR/final.mtz hklout $WORK_DIR/final_rfill.mtz", f"phenix.maps final_rfill.mtz final.pdb maps.input.reflection_data.labels='{native_f},{sigma_fp}'", "mv final.mtz final_original.mtz", "mv final_map_coeffs.mtz final.mtz", "rm -rf $DEST_DIR", crypt_shell.upload_dir(project, "$WORK_DIR", "$DEST_DIR"), "rm -rf $WORK_DIR", ) batch.save() return batch
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod, custom_ligfit, custom_rhofit): # Modules for HPC env softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD] jobs = JobsSet("Ligand Fit") hpc = SITE.get_hpc_runner() refine_results = _get_refine_results(project, filters, useLigFit, useRhoFit) for num, result in enumerate(refine_results): dataset = result.dataset if dataset.crystal.is_apo(): # don't try to fit ligand to an apo crystal continue fragment = get_crystals_fragment(dataset.crystal) result_dir = project.get_refine_result_dir(result) pdb = Path(result_dir, "final.pdb") clear_tmp_cmd = "" cif_out = Path(result_dir, fragment.code) if cifMethod == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n" elif cifMethod == "acedrg": cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n" clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n" elif cifMethod == "grade": cif_cmd = ( f"rm -f {cif_out}.cif {cif_out}.pdb\n" f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n" ) else: cif_cmd = "" rhofit_cmd = "" ligfit_cmd = "" ligCIF = f"{cif_out}.cif" ligPDB = f"{cif_out}.pdb" rhofit_outdir = Path(result_dir, "rhofit") ligfit_outdir = Path(result_dir, "ligfit") mtz_input = Path(result_dir, "final.mtz") if useRhoFit: if rhofit_outdir.exists(): rhofit_cmd += f"rm -rf {rhofit_outdir}\n" rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n" if useLigFit: if ligfit_outdir.exists(): ligfit_cmd += f"rm -rf {ligfit_outdir}\n" ligfit_cmd += f"mkdir -p {ligfit_outdir}\n" ligfit_cmd += f"cd {ligfit_outdir} \n" ligfit_cmd += ( f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} " f"fill=True clean_up=True {custom_ligfit}\n") batch = hpc.new_batch_file( "autoLigfit", project_script(project, f"autoligand_{dataset.name}_{num}.sh"), project_log_path(project, "auto_ligfit_%j_out.txt"), project_log_path(project, "auto_ligfit_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(hours=1)) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( cif_cmd, rhofit_cmd, ligfit_cmd, ) batch.add_commands(clear_tmp_cmd) batch.save() jobs.add_job(batch) # NOTE: all the update commands needs to be chained to run after each other, # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies. # Currently it does not work to specify that multiple jobs should be run after # a job is finished. # if useRhoFit: batch = add_update_job(jobs, hpc, project, "rhofit", dataset, batch) if useLigFit: add_update_job(jobs, hpc, project, "ligandfit", dataset, batch) jobs.submit()