def crypt_cmd(proj): if not proj.encrypted: return "" crypt_files = project_script(proj, "crypt_files.sh") token = tokens.get_valid_token(proj) return f"CRYPT_CMD='{crypt_files} {settings.CRYPT_URL} {token.as_base64()}'\n"
def run_dials(proj, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.DIALS_MOD] jobs = JobsSet("XIA2/DIALS") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")): outdir, image_file = _get_dataset_params(proj, dset) spg = get_space_group_argument(Tools.DIALS, options["spacegroup"]) if options["cellparam"] != "": cellpar = ",".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"unit_cell={cellpar}" else: unit_cell = "" customdials = options["customdials"] if options["friedel_law"] == "true": friedel = "atom=X" else: friedel = "" dials_commands, cpus = get_xia_dials_commands(spg, unit_cell, customdials, friedel, image_file, dset.images) batch = hpc.new_batch_file( "DIALS", project_script(proj, f"dials_fragmax_part{num}.sh"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/dials", f"cd {outdir}/dials", *dials_commands, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "dials", dset, batch) jobs.submit()
def run_xdsapp(project, filters, options): # Modules list for HPC env softwares = ["gopresto", versions.XDSAPP_MOD] jobs = JobsSet("XDSAPP") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")): outdir, image_file = _get_dataset_params(project, dset) if options["spacegroup"] is not None: cellpar = " ".join(options["cellparam"].split(",")) spacegroup = get_space_group_argument(Tools.XDSAPP, options["spacegroup"]) spg = f"--spacegroup='{spacegroup} {cellpar}'" else: spg = "" customxdsapp = options["customxdsapp"] if options["friedel_law"] == "true": friedel = "--fried=True" else: friedel = "--fried=False" xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp, friedel, image_file, dset.images) batch = hpc.new_batch_file( "XDSAPP", project_script(project, f"xdsapp_fragmax_part{num}.sh"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"), project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, ) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( f"mkdir -p {outdir}/xdsapp", f"cd {outdir}/xdsapp", xdsapp_command, ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, "xdsapp", dset, batch) jobs.submit()
def _save_pdb(project: Project, pdb_id, filename, pdb_data): name = path.splitext(filename)[0] nohet_filename = f"{name}_noHETATM.pdb" noanisou_filename = f"{name}_noANISOU.pdb" nohetanisou_filename = f"{name}_noANISOU_noHETATM.pdb" txc_filename = f"{name}_txc.pdb" orig_pdb = _add_pdb_entry(project, filename, pdb_id) nohet_pdb = _add_pdb_entry(project, nohet_filename, pdb_id) noanisou_pdb = _add_pdb_entry(project, noanisou_filename, pdb_id) nohetnoanisou_pdb = _add_pdb_entry(project, nohetanisou_filename, pdb_id) # write original pdb file 'as-is' to models folder with open_proj_file(project, project.get_pdb_file(orig_pdb)) as dest: dest.write(pdb_data) # filter out all non-ATOM entries from pdb and write it as *_noHETATM.pdb with open_proj_file(project, project.get_pdb_file(nohetnoanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM") or not line.startswith(b"ANISOU"): dest.write(line) with open_proj_file(project, project.get_pdb_file(nohet_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"HETATM"): dest.write(line) with open_proj_file(project, project.get_pdb_file(noanisou_pdb)) as dest: for line in pdb_data.splitlines(keepends=True): if not line.startswith(b"ANISOU"): dest.write(line) n_chains = pdb_chains(pdb_data.splitlines(keepends=True)) if n_chains > 1: txc_pdb = _add_pdb_entry(project, txc_filename, pdb_id) input_pdb_name = path.join(project.models_dir, f"{name}.pdb") jobs = JobsSet("phenix ensembler") batch = SITE.get_hpc_runner().new_batch_file( "phenix ensembler", project_script(project, "phenix_ensembler.sh"), project_syslog_path(project, "phenix_ensembler_%j.out"), project_syslog_path(project, "phenix_ensembler_%j.err"), ) batch.load_modules(["gopresto", PHENIX_MOD]) batch.add_commands( f"cd {project.models_dir}", f"phenix.ensembler {input_pdb_name} trim=TRUE output.location='{project.models_dir}'", f"mv {project.models_dir}/ensemble_merged.pdb {project.get_pdb_file(txc_pdb)}", ) batch.save() jobs.add_job(batch) jobs.submit()
def run_autoproc(proj, filters, options): # Modules list for HPC env softwares = [ "gopresto", versions.CCP4_MOD, versions.AUTOPROC_MOD, versions.DURIN_MOD, ] jobs = JobsSet("autoPROC") hpc = SITE.get_hpc_runner() epoch = str(round(time.time())) for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")): batch = hpc.new_batch_file( "autoPROC", project_script(proj, f"autoproc_fragmax_part{num}.sh"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"), project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"), ) batch.set_options( time=Duration(hours=168), exclusive=True, nodes=1, cpus_per_task=64, memory=DataSize(gigabyte=300), ) batch.purge_modules() batch.load_modules(softwares) outdir, h5master, sample, num_images = _get_dataset_params(proj, dset) if options["spacegroup"] != "": spacegroup = options["spacegroup"] spg = f"symm='{spacegroup}'" else: spg = "" if options["cellparam"] != "": cellpar = " ".join(options["cellparam"].split(",")) cellpar = cellpar.replace("(", "").replace(")", "") unit_cell = f"cell='{cellpar}'" else: unit_cell = "" customautoproc = options["customautoproc"] if options["friedel_law"] == "true": friedel = "-ANO" else: friedel = "-noANO" batch.add_commands( f"rm -rf {outdir}/autoproc", f"mkdir -p {outdir}", f"cd {outdir}", get_autoproc_command(outdir, spg, unit_cell, customautoproc, friedel, h5master, num_images), ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, proj, "autoproc", dset, batch) jobs.submit()
def launch_refine_jobs( project: Project, filters, pdb_file, space_group, run_aimless, refine_tool, refine_tool_commands, cpus, ): epoch = round(time.time()) jobs = JobsSet("Refine") hpc = SITE.get_hpc_runner() for dset in get_refine_datasets(project, filters, refine_tool): for tool, input_mtz in _find_input_mtzs(project, dset): batch = hpc.new_batch_file( f"refine {tool} {dset.name}", project_script(project, f"refine_{tool}_{refine_tool}_{dset.name}.sh"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"), project_log_path( project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"), cpus, ) batch.set_options( time=Duration(hours=12), nodes=1, mem_per_cpu=DataSize(gigabyte=5), ) batch.add_commands(crypt_shell.crypt_cmd(project)) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb_file, "model.pdb"), crypt_shell.fetch_file(project, input_mtz, "input.mtz"), ) # TODO: load tool specific modules? batch.load_modules(HPC_MODULES) if run_aimless: batch.add_commands( _aimless_cmd(space_group.short_name, "input.mtz")) results_dir = Path(project.get_dataset_results_dir(dset), tool) batch.add_commands( *refine_tool_commands, _upload_result_cmd(project, results_dir), "cd", "rm -rf $WORK_DIR", ) batch.save() jobs.add_job(batch) add_update_job(jobs, hpc, project, refine_tool, dset, batch) jobs.submit()
def _write_main_script( project: Project, pandda_dir: Path, method, methodshort, options ): epoch = round(time.time()) log_prefix = project_log_path(project, f"PanDDA_{method}_{epoch}_%j_") pandda_script = project_script(project, PANDDA_WORKER) giant_cluster = "/mxn/groups/biomax/wmxsoft/pandda/bin/giant.datasets.cluster" if options["reprocessZmap"]: pandda_cluster = "" else: pandda_cluster = f"{giant_cluster} ./*/final.pdb pdb_label=foldername" hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PDD{methodshort}", project_script(project, f"pandda_{method}.sh"), f"{log_prefix}out.txt", f"{log_prefix}err.txt", cpus=40, ) batch.set_options( time=Duration(hours=99), exclusive=True, nodes=1, ) if project.encrypted: # TODO: implement this? raise NotImplementedError("pandda for encrypted projects") # batch.add_command(crypt_shell.crypt_cmd(project)) # batch.assign_variable("WORK_DIR", "`mktemp -d`") # batch.add_commands( # "cd $WORK_DIR", crypt_shell.fetch_dir(project, data_dir, ".") # ) # # batch.load_modules(["gopresto", versions.CCP4_MOD, versions.PYMOL_MOD]) # batch.add_commands( # pandda_cluster, # f'python {pandda_script} . {project.protein} "{options}"', # crypt_shell.upload_dir( # project, "$WORK_DIR/pandda", path.join(data_dir, "pandda") # ), # crypt_shell.upload_dir( # project, # "$WORK_DIR/clustered-datasets", # path.join(data_dir, "clustered-datasets"), # ), # ) else: batch.add_command(f"cd {pandda_dir}") add_pandda_init_commands(batch) batch.add_commands( pandda_cluster, f'python {pandda_script} {pandda_dir} {project.protein} "{options}"', f"chmod -R 777 {project.pandda_dir}", ) # add commands to fix symlinks ln_command = '\'ln -f "$(readlink -m "$0")" "$0"\' {} \\;' batch.add_commands( f"cd {project.pandda_dir}; find -type l -iname *-pandda-input.* -exec bash -c {ln_command}", f"cd {project.pandda_dir}; find -type l -iname *pandda-model.pdb -exec bash -c {ln_command}", ) batch.save() return batch
def submit(request): project = current_project(request) panddaCMD = str(request.GET.get("panddaform")) if "analyse" in panddaCMD: ( function, proc, ref, use_apo, use_dmso, reproZmaps, use_CAD, ref_CAD, ign_errordts, keepup_last, ign_symlink, PanDDAfilter, min_dataset, customPanDDA, cifMethod, ncpus, blacklist, ) = panddaCMD.split(";") method = proc + "_" + ref if PanDDAfilter == "null": useSelected = False else: useSelected = True if min_dataset.isnumeric(): min_dataset = int(min_dataset) else: min_dataset = 40 options = { "method": method, "blacklist": blacklist, "useApos": str2bool(use_apo), "useSelected": useSelected, "reprocessZmap": str2bool(reproZmaps), "initpass": True, "min_datasets": min_dataset, "rerun_state": False, "dtsfilter": PanDDAfilter, "customPanDDA": customPanDDA, "reprocessing": False, "reprocessing_mode": "reprocess", "nproc": ncpus, } res_dir = Path(project.pandda_dir, method) methodshort = proc[:2] + ref[:2] if methodshort == "bebe": methodshort = "best" if not options["reprocessZmap"] and res_dir.exists(): t1 = threading.Thread( target=pandda_worker, args=(project, method, methodshort, options, cifMethod), ) t1.daemon = True t1.start() elif options["reprocessZmap"]: script = project_script(project, f"panddaRUN_{project.protein}{method}.sh") hpc.run_sbatch(script) else: start_thread( pandda_worker, project, method, methodshort, options, cifMethod ) return render(request, "jobs_submitted.html", {"command": panddaCMD})
def _write_prepare_script( project: Project, rn, method, dataset, pdb, mtz, resHigh, free_r_flag, native_f, sigma_fp, cif_method, ): epoch = round(time.time()) output_dir = Path(project.pandda_method_dir(method), dataset.name) hpc = SITE.get_hpc_runner() batch = hpc.new_batch_file( f"PnD{rn}", project_script(project, f"pandda_prepare_{method}_{dataset.name}.sh"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_out.txt"), project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(minutes=15), memory=DataSize(gigabyte=5)) batch.add_command(crypt_shell.crypt_cmd(project)) batch.assign_variable("DEST_DIR", output_dir) batch.assign_variable("WORK_DIR", "`mktemp -d`") batch.add_commands( "cd $WORK_DIR", crypt_shell.fetch_file(project, pdb, "final.pdb"), crypt_shell.fetch_file(project, mtz, "final.mtz"), ) batch.purge_modules() batch.load_modules( ["gopresto", versions.PHENIX_MOD, versions.CCP4_MOD, versions.BUSTER_MOD] ) if not dataset.crystal.is_apo(): fragment = get_crystals_fragment(dataset.crystal) # non-apo crystal should have a fragment assert fragment if cif_method == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output=$WORK_DIR/{fragment.code} --opt\n" else: assert cif_method == "grade" cif_cmd = ( f"grade '{fragment.smiles}' -ocif $WORK_DIR/{fragment.code}.cif " f"-opdb $WORK_DIR/{fragment.code}.pdb -nomogul\n" ) batch.add_command(cif_cmd) batch.add_commands( f'printf "monitor BRIEF\\n labin file 1 -\\n ALL\\n resolution file 1 999.0 {resHigh}\\n" | \\\n' " cad hklin1 $WORK_DIR/final.mtz hklout $WORK_DIR/final.mtz", "uniqueify -f FreeR_flag $WORK_DIR/final.mtz $WORK_DIR/final.mtz", f'printf "COMPLETE FREE={free_r_flag} \\nEND\\n" | \\\n' " freerflag hklin $WORK_DIR/final.mtz hklout $WORK_DIR/final_rfill.mtz", f"phenix.maps final_rfill.mtz final.pdb maps.input.reflection_data.labels='{native_f},{sigma_fp}'", "mv final.mtz final_original.mtz", "mv final_map_coeffs.mtz final.mtz", "rm -rf $DEST_DIR", crypt_shell.upload_dir(project, "$WORK_DIR", "$DEST_DIR"), "rm -rf $WORK_DIR", ) batch.save() return batch
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod, custom_ligfit, custom_rhofit): # Modules for HPC env softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD] jobs = JobsSet("Ligand Fit") hpc = SITE.get_hpc_runner() refine_results = _get_refine_results(project, filters, useLigFit, useRhoFit) for num, result in enumerate(refine_results): dataset = result.dataset if dataset.crystal.is_apo(): # don't try to fit ligand to an apo crystal continue fragment = get_crystals_fragment(dataset.crystal) result_dir = project.get_refine_result_dir(result) pdb = Path(result_dir, "final.pdb") clear_tmp_cmd = "" cif_out = Path(result_dir, fragment.code) if cifMethod == "elbow": cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n" elif cifMethod == "acedrg": cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n" clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n" elif cifMethod == "grade": cif_cmd = ( f"rm -f {cif_out}.cif {cif_out}.pdb\n" f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n" ) else: cif_cmd = "" rhofit_cmd = "" ligfit_cmd = "" ligCIF = f"{cif_out}.cif" ligPDB = f"{cif_out}.pdb" rhofit_outdir = Path(result_dir, "rhofit") ligfit_outdir = Path(result_dir, "ligfit") mtz_input = Path(result_dir, "final.mtz") if useRhoFit: if rhofit_outdir.exists(): rhofit_cmd += f"rm -rf {rhofit_outdir}\n" rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n" if useLigFit: if ligfit_outdir.exists(): ligfit_cmd += f"rm -rf {ligfit_outdir}\n" ligfit_cmd += f"mkdir -p {ligfit_outdir}\n" ligfit_cmd += f"cd {ligfit_outdir} \n" ligfit_cmd += ( f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} " f"fill=True clean_up=True {custom_ligfit}\n") batch = hpc.new_batch_file( "autoLigfit", project_script(project, f"autoligand_{dataset.name}_{num}.sh"), project_log_path(project, "auto_ligfit_%j_out.txt"), project_log_path(project, "auto_ligfit_%j_err.txt"), cpus=1, ) batch.set_options(time=Duration(hours=1)) batch.purge_modules() batch.load_modules(softwares) batch.add_commands( cif_cmd, rhofit_cmd, ligfit_cmd, ) batch.add_commands(clear_tmp_cmd) batch.save() jobs.add_job(batch) # NOTE: all the update commands needs to be chained to run after each other, # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies. # Currently it does not work to specify that multiple jobs should be run after # a job is finished. # if useRhoFit: batch = add_update_job(jobs, hpc, project, "rhofit", dataset, batch) if useLigFit: add_update_job(jobs, hpc, project, "ligandfit", dataset, batch) jobs.submit()