Ejemplo n.º 1
0
def add_update_job(
    jobs_set: JobsSet, hpc, project: Project, tool, dataset, dataset_batch
):
    batch = hpc.new_batch_file(
        f"update results for {dataset.name}",
        "./manage.py",
        project_syslog_path(project, "update_dataset_results-%j.stdout"),
        project_syslog_path(project, "update_dataset_results-%j.stderr"),
    )

    jobs_set.add_job(
        batch,
        ["update", f"{project.id}", tool, f"{dataset.id}"],
        run_after=[dataset_batch],
        run_on=JobsSet.Destination.LOCAL,
    )

    return batch
Ejemplo n.º 2
0
def pandda_worker(project: Project, method, methodshort, options, cif_method):
    rn = str(randint(10000, 99999))
    prepare_scripts = []

    proc_tool, refine_tool = method.split("_")
    refine_results = _get_best_results(project, proc_tool, refine_tool)

    selection = PanddaSelectedDatasets()

    for refine_result in refine_results:
        res_dir = project.get_refine_result_dir(refine_result)
        final_pdb = Path(res_dir, "final.pdb")
        final_mtz = Path(res_dir, "final.mtz")

        selection.add(refine_result.dataset.name, final_pdb)

        res_high, free_r_flag, native_f, sigma_fp = read_info(project, str(final_mtz))

        script = _write_prepare_script(
            project,
            rn,
            method,
            refine_result.dataset,
            final_pdb,
            final_mtz,
            res_high,
            free_r_flag,
            native_f,
            sigma_fp,
            cif_method,
        )

        prepare_scripts.append(script)

    pandda_dir = Path(project.pandda_dir, method)
    pandda_dir.mkdir(parents=True, exist_ok=True)

    selection.save(Path(pandda_dir))

    main_script = _write_main_script(project, pandda_dir, method, methodshort, options)

    #
    # submit all pandda script to the HPC
    #
    jobs = JobsSet("PanDDa")

    for prep_script in prepare_scripts:
        jobs.add_job(prep_script)

    jobs.add_job(main_script, run_after=prepare_scripts)
    jobs.submit()
Ejemplo n.º 3
0
def run_dials(proj, filters, options):
    # Modules list for HPC env
    softwares = ["gopresto", versions.DIALS_MOD]

    jobs = JobsSet("XIA2/DIALS")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(proj, filters, "dials")):
        outdir, image_file = _get_dataset_params(proj, dset)

        spg = get_space_group_argument(Tools.DIALS, options["spacegroup"])

        if options["cellparam"] != "":
            cellpar = ",".join(options["cellparam"].split(","))
            cellpar = cellpar.replace("(", "").replace(")", "")
            unit_cell = f"unit_cell={cellpar}"
        else:
            unit_cell = ""
        customdials = options["customdials"]

        if options["friedel_law"] == "true":
            friedel = "atom=X"
        else:
            friedel = ""

        dials_commands, cpus = get_xia_dials_commands(spg, unit_cell,
                                                      customdials, friedel,
                                                      image_file, dset.images)

        batch = hpc.new_batch_file(
            "DIALS",
            project_script(proj, f"dials_fragmax_part{num}.sh"),
            project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_out.txt"),
            project_log_path(proj, f"multi_xia2DIALS_{epoch}_%j_err.txt"),
            cpus,
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            f"mkdir -p {outdir}/dials",
            f"cd {outdir}/dials",
            *dials_commands,
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, proj, "dials", dset, batch)

    jobs.submit()
Ejemplo n.º 4
0
def run_xdsapp(project, filters, options):
    # Modules list for HPC env
    softwares = ["gopresto", versions.XDSAPP_MOD]

    jobs = JobsSet("XDSAPP")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(project, filters, "xdsapp")):
        outdir, image_file = _get_dataset_params(project, dset)

        if options["spacegroup"] is not None:
            cellpar = " ".join(options["cellparam"].split(","))
            spacegroup = get_space_group_argument(Tools.XDSAPP,
                                                  options["spacegroup"])
            spg = f"--spacegroup='{spacegroup} {cellpar}'"
        else:
            spg = ""

        customxdsapp = options["customxdsapp"]
        if options["friedel_law"] == "true":
            friedel = "--fried=True"
        else:
            friedel = "--fried=False"

        xdsapp_command, cpus = get_xdsapp_command(outdir, spg, customxdsapp,
                                                  friedel, image_file,
                                                  dset.images)

        batch = hpc.new_batch_file(
            "XDSAPP",
            project_script(project, f"xdsapp_fragmax_part{num}.sh"),
            project_log_path(project, f"multi_xdsapp_{epoch}_%j_out.txt"),
            project_log_path(project, f"multi_xdsapp_{epoch}_%j_err.txt"),
            cpus,
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            f"mkdir -p {outdir}/xdsapp",
            f"cd {outdir}/xdsapp",
            xdsapp_command,
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, project, "xdsapp", dset, batch)

    jobs.submit()
Ejemplo n.º 5
0
def _save_pdb(project: Project, pdb_id, filename, pdb_data):
    name = path.splitext(filename)[0]
    nohet_filename = f"{name}_noHETATM.pdb"
    noanisou_filename = f"{name}_noANISOU.pdb"
    nohetanisou_filename = f"{name}_noANISOU_noHETATM.pdb"
    txc_filename = f"{name}_txc.pdb"

    orig_pdb = _add_pdb_entry(project, filename, pdb_id)
    nohet_pdb = _add_pdb_entry(project, nohet_filename, pdb_id)
    noanisou_pdb = _add_pdb_entry(project, noanisou_filename, pdb_id)
    nohetnoanisou_pdb = _add_pdb_entry(project, nohetanisou_filename, pdb_id)

    # write original pdb file 'as-is' to models folder
    with open_proj_file(project, project.get_pdb_file(orig_pdb)) as dest:
        dest.write(pdb_data)

    # filter out all non-ATOM entries from pdb and write it as *_noHETATM.pdb
    with open_proj_file(project, project.get_pdb_file(nohetnoanisou_pdb)) as dest:
        for line in pdb_data.splitlines(keepends=True):
            if not line.startswith(b"HETATM") or not line.startswith(b"ANISOU"):
                dest.write(line)

    with open_proj_file(project, project.get_pdb_file(nohet_pdb)) as dest:
        for line in pdb_data.splitlines(keepends=True):
            if not line.startswith(b"HETATM"):
                dest.write(line)

    with open_proj_file(project, project.get_pdb_file(noanisou_pdb)) as dest:
        for line in pdb_data.splitlines(keepends=True):
            if not line.startswith(b"ANISOU"):
                dest.write(line)

    n_chains = pdb_chains(pdb_data.splitlines(keepends=True))

    if n_chains > 1:
        txc_pdb = _add_pdb_entry(project, txc_filename, pdb_id)

        input_pdb_name = path.join(project.models_dir, f"{name}.pdb")

        jobs = JobsSet("phenix ensembler")
        batch = SITE.get_hpc_runner().new_batch_file(
            "phenix ensembler",
            project_script(project, "phenix_ensembler.sh"),
            project_syslog_path(project, "phenix_ensembler_%j.out"),
            project_syslog_path(project, "phenix_ensembler_%j.err"),
        )
        batch.load_modules(["gopresto", PHENIX_MOD])
        batch.add_commands(
            f"cd {project.models_dir}",
            f"phenix.ensembler {input_pdb_name} trim=TRUE output.location='{project.models_dir}'",
            f"mv {project.models_dir}/ensemble_merged.pdb {project.get_pdb_file(txc_pdb)}",
        )
        batch.save()
        jobs.add_job(batch)
        jobs.submit()
Ejemplo n.º 6
0
def run_autoproc(proj, filters, options):
    # Modules list for HPC env
    softwares = [
        "gopresto",
        versions.CCP4_MOD,
        versions.AUTOPROC_MOD,
        versions.DURIN_MOD,
    ]

    jobs = JobsSet("autoPROC")
    hpc = SITE.get_hpc_runner()
    epoch = str(round(time.time()))

    for num, dset in enumerate(get_proc_datasets(proj, filters, "autoproc")):
        batch = hpc.new_batch_file(
            "autoPROC",
            project_script(proj, f"autoproc_fragmax_part{num}.sh"),
            project_log_path(proj, f"multi_autoproc_{epoch}_%j_out.txt"),
            project_log_path(proj, f"multi_autoproc_{epoch}_%j_err.txt"),
        )

        batch.set_options(
            time=Duration(hours=168),
            exclusive=True,
            nodes=1,
            cpus_per_task=64,
            memory=DataSize(gigabyte=300),
        )

        batch.purge_modules()
        batch.load_modules(softwares)

        outdir, h5master, sample, num_images = _get_dataset_params(proj, dset)

        if options["spacegroup"] != "":
            spacegroup = options["spacegroup"]
            spg = f"symm='{spacegroup}'"
        else:
            spg = ""
        if options["cellparam"] != "":
            cellpar = " ".join(options["cellparam"].split(","))
            cellpar = cellpar.replace("(", "").replace(")", "")
            unit_cell = f"cell='{cellpar}'"
        else:
            unit_cell = ""

        customautoproc = options["customautoproc"]
        if options["friedel_law"] == "true":
            friedel = "-ANO"
        else:
            friedel = "-noANO"

        batch.add_commands(
            f"rm -rf {outdir}/autoproc",
            f"mkdir -p {outdir}",
            f"cd {outdir}",
            get_autoproc_command(outdir, spg, unit_cell, customautoproc,
                                 friedel, h5master, num_images),
        )

        batch.save()
        jobs.add_job(batch)

        add_update_job(jobs, hpc, proj, "autoproc", dset, batch)

    jobs.submit()
Ejemplo n.º 7
0
def launch_refine_jobs(
    project: Project,
    filters,
    pdb_file,
    space_group,
    run_aimless,
    refine_tool,
    refine_tool_commands,
    cpus,
):
    epoch = round(time.time())
    jobs = JobsSet("Refine")
    hpc = SITE.get_hpc_runner()

    for dset in get_refine_datasets(project, filters, refine_tool):
        for tool, input_mtz in _find_input_mtzs(project, dset):
            batch = hpc.new_batch_file(
                f"refine {tool} {dset.name}",
                project_script(project,
                               f"refine_{tool}_{refine_tool}_{dset.name}.sh"),
                project_log_path(
                    project, f"refine_{tool}_{dset.name}_{epoch}_%j_out.txt"),
                project_log_path(
                    project, f"refine_{tool}_{dset.name}_{epoch}_%j_err.txt"),
                cpus,
            )
            batch.set_options(
                time=Duration(hours=12),
                nodes=1,
                mem_per_cpu=DataSize(gigabyte=5),
            )

            batch.add_commands(crypt_shell.crypt_cmd(project))

            batch.assign_variable("WORK_DIR", "`mktemp -d`")
            batch.add_commands(
                "cd $WORK_DIR",
                crypt_shell.fetch_file(project, pdb_file, "model.pdb"),
                crypt_shell.fetch_file(project, input_mtz, "input.mtz"),
            )

            # TODO: load tool specific modules?
            batch.load_modules(HPC_MODULES)

            if run_aimless:
                batch.add_commands(
                    _aimless_cmd(space_group.short_name, "input.mtz"))

            results_dir = Path(project.get_dataset_results_dir(dset), tool)

            batch.add_commands(
                *refine_tool_commands,
                _upload_result_cmd(project, results_dir),
                "cd",
                "rm -rf $WORK_DIR",
            )

            batch.save()
            jobs.add_job(batch)

            add_update_job(jobs, hpc, project, refine_tool, dset, batch)

    jobs.submit()
Ejemplo n.º 8
0
def auto_ligand_fit(project, useLigFit, useRhoFit, filters, cifMethod,
                    custom_ligfit, custom_rhofit):
    # Modules for HPC env
    softwares = ["gopresto", versions.BUSTER_MOD, versions.PHENIX_MOD]

    jobs = JobsSet("Ligand Fit")
    hpc = SITE.get_hpc_runner()

    refine_results = _get_refine_results(project, filters, useLigFit,
                                         useRhoFit)

    for num, result in enumerate(refine_results):
        dataset = result.dataset
        if dataset.crystal.is_apo():
            # don't try to fit ligand to an apo crystal
            continue

        fragment = get_crystals_fragment(dataset.crystal)
        result_dir = project.get_refine_result_dir(result)

        pdb = Path(result_dir, "final.pdb")

        clear_tmp_cmd = ""
        cif_out = Path(result_dir, fragment.code)

        if cifMethod == "elbow":
            cif_cmd = f"phenix.elbow --smiles='{fragment.smiles}' --output={cif_out}\n"
        elif cifMethod == "acedrg":
            cif_cmd = f"acedrg -i '{fragment.smiles}' -o {cif_out}\n"
            clear_tmp_cmd = f"rm -rf {cif_out}_TMP/\n"
        elif cifMethod == "grade":
            cif_cmd = (
                f"rm -f {cif_out}.cif {cif_out}.pdb\n"
                f"grade '{fragment.smiles}' -ocif {cif_out}.cif -opdb {cif_out}.pdb -nomogul\n"
            )
        else:
            cif_cmd = ""
        rhofit_cmd = ""
        ligfit_cmd = ""

        ligCIF = f"{cif_out}.cif"
        ligPDB = f"{cif_out}.pdb"

        rhofit_outdir = Path(result_dir, "rhofit")
        ligfit_outdir = Path(result_dir, "ligfit")
        mtz_input = Path(result_dir, "final.mtz")

        if useRhoFit:
            if rhofit_outdir.exists():
                rhofit_cmd += f"rm -rf {rhofit_outdir}\n"
            rhofit_cmd += f"rhofit -l {ligCIF} -m {mtz_input} -p {pdb} -d {rhofit_outdir} {custom_rhofit}\n"

        if useLigFit:
            if ligfit_outdir.exists():
                ligfit_cmd += f"rm -rf {ligfit_outdir}\n"
            ligfit_cmd += f"mkdir -p {ligfit_outdir}\n"
            ligfit_cmd += f"cd {ligfit_outdir} \n"
            ligfit_cmd += (
                f"phenix.ligandfit data={mtz_input} model={pdb} ligand={ligPDB} "
                f"fill=True clean_up=True {custom_ligfit}\n")

        batch = hpc.new_batch_file(
            "autoLigfit",
            project_script(project, f"autoligand_{dataset.name}_{num}.sh"),
            project_log_path(project, "auto_ligfit_%j_out.txt"),
            project_log_path(project, "auto_ligfit_%j_err.txt"),
            cpus=1,
        )

        batch.set_options(time=Duration(hours=1))

        batch.purge_modules()
        batch.load_modules(softwares)

        batch.add_commands(
            cif_cmd,
            rhofit_cmd,
            ligfit_cmd,
        )

        batch.add_commands(clear_tmp_cmd)

        batch.save()
        jobs.add_job(batch)

        # NOTE: all the update commands needs to be chained to run after each other,
        # due to limitations (bugs!) in jobsd handling of 'run_after' dependencies.
        # Currently it does not work to specify that multiple jobs should be run after
        # a job is finished.
        #

        if useRhoFit:
            batch = add_update_job(jobs, hpc, project, "rhofit", dataset,
                                   batch)

        if useLigFit:
            add_update_job(jobs, hpc, project, "ligandfit", dataset, batch)

    jobs.submit()