Esempio n. 1
0
def run_dials(proj, filters, options):
    # Modules list for HPC env
    softwares = ["gopresto", versions.DIALS_MOD]

    jobs = JobsSet("XIA2/DIALS")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")):
        outdir, image_file = _get_dataset_params(proj, dset)

        spg = get_space_group_argument(Tools.DIALS, options["spacegroup"])

        if options["cellparam"] != "":
            cellpar = ",".join(options["cellparam"].split(","))
            cellpar = cellpar.replace("(", "").replace(")", "")
            unit_cell = f"unit_cell={cellpar}"
        else:
            unit_cell = ""
        customdials = options["customdials"]

        if options["friedel_law"] == "true":
            friedel = "atom=X"
        else:
            friedel = ""

        dials_commands, cpus = get_xia_dials_commands(spg, unit_cell,
                                                      customdials, friedel,
                                                      image_file, dset.images)

        batch = hpc.new_batch_file(
            "DIALS",
            project_script(proj, f"dials_fragmax_part{num}.sh"),
            project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"),
            project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"),
            cpus,
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            f"mkdir -p {outdir}/dials",
            f"cd {outdir}/dials",
            *dials_commands,
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, proj, "dials", dset, batch)

    jobs.submit()
Esempio n. 2
0
def run_xdsapp(project, filters, options):
    # Modules list for HPC env
    softwares = ["gopresto", versions.XDSAPP_MOD]

    jobs = JobsSet("XDSAPP")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")):
        outdir, image_file = _get_dataset_params(project, dset)

        if options["spacegroup"] is not None:
            cellpar = " ".join(options["cellparam"].split(","))
            spacegroup = get_space_group_argument(Tools.XDSAPP,
                                                  options["spacegroup"])
            spg = f"--spacegroup='{spacegroup} {cellpar}'"
        else:
            spg = ""

        customxdsapp = options["customxdsapp"]
        if options["friedel_law"] == "true":
            friedel = "--fried=True"
        else:
            friedel = "--fried=False"

        xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp,
                                                  friedel, image_file,
                                                  dset.images)

        batch = hpc.new_batch_file(
            "XDSAPP",
            project_script(project, f"xdsapp_fragmax_part{num}.sh"),
            project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"),
            project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"),
            cpus,
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            f"mkdir -p {outdir}/xdsapp",
            f"cd {outdir}/xdsapp",
            xdsapp_command,
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, project, "xdsapp", dset, batch)

    jobs.submit()
Esempio n. 3
0
def run_autoproc(proj, filters, options):
    # Modules list for HPC env
    softwares = [
        "gopresto",
        versions.CCP4_MOD,
        versions.AUTOPROC_MOD,
        versions.DURIN_MOD,
    ]

    jobs = JobsSet("autoPROC")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")):
        batch = hpc.new_batch_file(
            "autoPROC",
            project_script(proj, f"autoproc_fragmax_part{num}.sh"),
            project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"),
            project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"),
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
            cpus_per_task=64,
            memory=DataSize(gigabyte=300),
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        outdir, h5master, sample, num_images = _get_dataset_params(proj, dset)

        if options["spacegroup"] != "":
            spacegroup = options["spacegroup"]
            spg = f"symm='{spacegroup}'"
        else:
            spg = ""
        if options["cellparam"] != "":
            cellpar = " ".join(options["cellparam"].split(","))
            cellpar = cellpar.replace("(", "").replace(")", "")
            unit_cell = f"cell='{cellpar}'"
        else:
            unit_cell = ""

        customautoproc = options["customautoproc"]
        if options["friedel_law"] == "true":
            friedel = "-ANO"
        else:
            friedel = "-noANO"

        batch.add_commands(
            f"rm -rf {outdir}/autoproc",
            f"mkdir -p {outdir}",
            f"cd {outdir}",
            get_autoproc_command(outdir, spg, unit_cell, customautoproc,
                                 friedel, h5master, num_images),
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, proj, "autoproc", dset, batch)

    jobs.submit()
Esempio n. 4
0
def launch_refine_jobs(
    project: Project,
    filters,
    pdb_file,
    space_group,
    run_aimless,
    refine_tool,
    refine_tool_commands,
    cpus,
):
    epoch = round(time.time())
    jobs = JobsSet("Refine")
    hpc = SITE.get_hpc_runner()

    for dset in get_refine_datasets(project, filters, refine_tool):
        for tool, input_mtz in _find_input_mtzs(project, dset):
            batch = hpc.new_batch_file(
                f"refine {tool} {dset.name}",
                project_script(project,
                               f"refine_{tool}_{refine_tool}_{dset.name}.sh"),
                project_log_path(
                    project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"),
                project_log_path(
                    project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"),
                cpus,
            )
            batch.set_options(
                time=Duration(hours=12),
                nodes=1,
                mem_per_cpu=DataSize(gigabyte=5),
            )

            batch.add_commands(crypt_shell.crypt_cmd(project))

            batch.assign_variable("WORK_DIR", "`mktemp -d`")
            batch.add_commands(
                "cd $WORK_DIR",
                crypt_shell.fetch_file(project, pdb_file, "model.pdb"),
                crypt_shell.fetch_file(project, input_mtz, "input.mtz"),
            )

            # TODO: load tool specific modules?
            batch.load_modules(HPC_MODULES)

            if run_aimless:
                batch.add_commands(
                    _aimless_cmd(space_group.short_name, "input.mtz"))

            results_dir = Path(project.get_dataset_results_dir(dset), tool)

            batch.add_commands(
                *refine_tool_commands,
                _upload_result_cmd(project, results_dir),
                "cd",
                "rm -rf $WORK_DIR",
            )

            batch.save()
            jobs.add_job(batch)

            add_update_job(jobs, hpc, project, refine_tool, dset, batch)

    jobs.submit()
Esempio n. 5
0
def _write_main_script(
    project: Project, pandda_dir: Path, method, methodshort, options
):
    epoch = round(time.time())

    log_prefix = project_log_path(project, f"PanDDA_{method}_{epoch}_%j_")

    pandda_script = project_script(project, PANDDA_WORKER)

    giant_cluster = "/mxn/groups/biomax/wmxsoft/pandda/bin/giant.datasets.cluster"
    if options["reprocessZmap"]:
        pandda_cluster = ""
    else:
        pandda_cluster = f"{giant_cluster} ./*/final.pdb pdb_label=foldername"

    hpc = SITE.get_hpc_runner()
    batch = hpc.new_batch_file(
        f"PDD{methodshort}",
        project_script(project, f"pandda_{method}.sh"),
        f"{log_prefix}out.txt",
        f"{log_prefix}err.txt",
        cpus=40,
    )
    batch.set_options(
        time=Duration(hours=99),
        exclusive=True,
        nodes=1,
    )

    if project.encrypted:
        # TODO: implement this?
        raise NotImplementedError("pandda for encrypted projects")
        # batch.add_command(crypt_shell.crypt_cmd(project))
        # batch.assign_variable("WORK_DIR", "`mktemp -d`")
        # batch.add_commands(
        #     "cd $WORK_DIR", crypt_shell.fetch_dir(project, data_dir, ".")
        # )
        #
        # batch.load_modules(["gopresto", versions.CCP4_MOD, versions.PYMOL_MOD])
        # batch.add_commands(
        #     pandda_cluster,
        #     f'python {pandda_script} . {project.protein} "{options}"',
        #     crypt_shell.upload_dir(
        #         project, "$WORK_DIR/pandda", path.join(data_dir, "pandda")
        #     ),
        #     crypt_shell.upload_dir(
        #         project,
        #         "$WORK_DIR/clustered-datasets",
        #         path.join(data_dir, "clustered-datasets"),
        #     ),
        # )
    else:
        batch.add_command(f"cd {pandda_dir}")

        add_pandda_init_commands(batch)

        batch.add_commands(
            pandda_cluster,
            f'python {pandda_script} {pandda_dir} {project.protein} "{options}"',
            f"chmod -R 777 {project.pandda_dir}",
        )

        # add commands to fix symlinks
        ln_command = '\'ln -f "$(readlink -m "$0")" "$0"\' {} \\;'
        batch.add_commands(
            f"cd {project.pandda_dir}; find -type l -iname *-pandda-input.* -exec bash -c {ln_command}",
            f"cd {project.pandda_dir}; find -type l -iname *pandda-model.pdb -exec bash -c {ln_command}",
        )

    batch.save()
    return batch
Esempio n. 6
0
def _write_prepare_script(
    project: Project,
    rn,
    method,
    dataset,
    pdb,
    mtz,
    resHigh,
    free_r_flag,
    native_f,
    sigma_fp,
    cif_method,
):
    epoch = round(time.time())
    output_dir = Path(project.pandda_method_dir(method), dataset.name)

    hpc = SITE.get_hpc_runner()
    batch = hpc.new_batch_file(
        f"PnD{rn}",
        project_script(project, f"pandda_prepare_{method}_{dataset.name}.sh"),
        project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_out.txt"),
        project_log_path(project, f"{dataset.name}_PanDDA_{epoch}_%j_err.txt"),
        cpus=1,
    )
    batch.set_options(time=Duration(minutes=15), memory=DataSize(gigabyte=5))

    batch.add_command(crypt_shell.crypt_cmd(project))
    batch.assign_variable("DEST_DIR", output_dir)
    batch.assign_variable("WORK_DIR", "`mktemp -d`")
    batch.add_commands(
        "cd $WORK_DIR",
        crypt_shell.fetch_file(project, pdb, "final.pdb"),
        crypt_shell.fetch_file(project, mtz, "final.mtz"),
    )

    batch.purge_modules()
    batch.load_modules(
        ["gopresto", versions.PHENIX_MOD, versions.CCP4_MOD, versions.BUSTER_MOD]
    )

    if not dataset.crystal.is_apo():
        fragment = get_crystals_fragment(dataset.crystal)
        # non-apo crystal should have a fragment
        assert fragment
        if cif_method == "elbow":
            cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output=$WORK_DIR/{fragment.code} --opt\n"
        else:
            assert cif_method == "grade"
            cif_cmd = (
                f"grade '{fragment.smiles}' -ocif $WORK_DIR/{fragment.code}.cif "
                f"-opdb $WORK_DIR/{fragment.code}.pdb -nomogul\n"
            )

        batch.add_command(cif_cmd)

    batch.add_commands(
        f'printf "monitor BRIEF\\n labin file 1 -\\n  ALL\\n resolution file 1 999.0 {resHigh}\\n" | \\\n'
        "    cad hklin1 $WORK_DIR/final.mtz hklout $WORK_DIR/final.mtz",
        "uniqueify -f FreeR_flag $WORK_DIR/final.mtz $WORK_DIR/final.mtz",
        f'printf "COMPLETE FREE={free_r_flag} \\nEND\\n" | \\\n'
        "    freerflag hklin $WORK_DIR/final.mtz hklout $WORK_DIR/final_rfill.mtz",
        f"phenix.maps final_rfill.mtz final.pdb maps.input.reflection_data.labels='{native_f},{sigma_fp}'",
        "mv final.mtz final_original.mtz",
        "mv final_map_coeffs.mtz final.mtz",
        "rm -rf $DEST_DIR",
        crypt_shell.upload_dir(project, "$WORK_DIR", "$DEST_DIR"),
        "rm -rf $WORK_DIR",
    )

    batch.save()
    return batch
Esempio n. 7
0
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod,
                    custom_ligfit, custom_rhofit):
    # Modules for HPC env
    softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD]

    jobs = JobsSet("Ligand Fit")
    hpc = SITE.get_hpc_runner()

    refine_results = _get_refine_results(project, filters, useLigFit,
                                         useRhoFit)

    for num, result in enumerate(refine_results):
        dataset = result.dataset
        if dataset.crystal.is_apo():
            # don't try to fit ligand to an apo crystal
            continue

        fragment = get_crystals_fragment(dataset.crystal)
        result_dir = project.get_refine_result_dir(result)

        pdb = Path(result_dir, "final.pdb")

        clear_tmp_cmd = ""
        cif_out = Path(result_dir, fragment.code)

        if cifMethod == "elbow":
            cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n"
        elif cifMethod == "acedrg":
            cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n"
            clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n"
        elif cifMethod == "grade":
            cif_cmd = (
                f"rm -f {cif_out}.cif {cif_out}.pdb\n"
                f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n"
            )
        else:
            cif_cmd = ""
        rhofit_cmd = ""
        ligfit_cmd = ""

        ligCIF = f"{cif_out}.cif"
        ligPDB = f"{cif_out}.pdb"

        rhofit_outdir = Path(result_dir, "rhofit")
        ligfit_outdir = Path(result_dir, "ligfit")
        mtz_input = Path(result_dir, "final.mtz")

        if useRhoFit:
            if rhofit_outdir.exists():
                rhofit_cmd += f"rm -rf {rhofit_outdir}\n"
            rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n"

        if useLigFit:
            if ligfit_outdir.exists():
                ligfit_cmd += f"rm -rf {ligfit_outdir}\n"
            ligfit_cmd += f"mkdir -p {ligfit_outdir}\n"
            ligfit_cmd += f"cd {ligfit_outdir} \n"
            ligfit_cmd += (
                f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} "
                f"fill=True clean_up=True {custom_ligfit}\n")

        batch = hpc.new_batch_file(
            "autoLigfit",
            project_script(project, f"autoligand_{dataset.name}_{num}.sh"),
            project_log_path(project, "auto_ligfit_%j_out.txt"),
            project_log_path(project, "auto_ligfit_%j_err.txt"),
            cpus=1,
        )

        batch.set_options(time=Duration(hours=1))

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            cif_cmd,
            rhofit_cmd,
            ligfit_cmd,
        )

        batch.add_commands(clear_tmp_cmd)

        batch.save()
        jobs.add_job(batch)

        # NOTE: all the update commands needs to be chained to run after each other,
        # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies.
        # Currently it does not work to specify that multiple jobs should be run after
        # a job is finished.
        #

        if useRhoFit:
            batch = add_update_job(jobs, hpc, project, "rhofit", dataset,
                                   batch)

        if useLigFit:
            add_update_job(jobs, hpc, project, "ligandfit", dataset, batch)

    jobs.submit()