def test_convex_hull_exhaustive_search(self):
     self.params.exhaustive.output.csv_name = os.path.join(
         self.params.output.out_dir, "test.csv")
     exhaustive(self.params)
     bound_occ, u_iso, fofc = get_minimum_fofc(
         self.params.exhaustive.output.csv_name)
     self.assertAlmostEqual(0.6, bound_occ)
     self.assertAlmostEqual(0.35, u_iso)
    def test_multiple_exhaustive_search(self):
        """ Test with minimal number of parameters changed from default."""

        self.params.exhaustive.output.csv_name = os.path.join(
            self.params.output.out_dir, "test.csv")
        multiple_exhaustive(self.params)
        bound_occ, u_iso, fofc = get_minimum_fofc(
            self.params.exhaustive.output.csv_name)
        self.assertAlmostEqual(0.6, bound_occ)
        self.assertAlmostEqual(0.33, u_iso)
    def test_exhaustive_search_multiprocessing(self):
        """ Test with minimal number of parameters changed from default."""

        self.params.settings.processes = 4

        self.params.exhaustive.output.csv_name = os.path.join(
            self.params.output.out_dir, "test.csv")
        self.params.exhaustive.options.step = 0.2
        exhaustive(self.params)
        bound_occ, u_iso, fofc = get_minimum_fofc(
            self.params.exhaustive.output.csv_name)
        self.assertAlmostEqual(0.6, bound_occ)
        self.assertAlmostEqual(0.4, u_iso)
예제 #4
0
def write_minima_pdb(input_pdb, output_pdb, csv_name, params):
    """
    Write pdb from the minima in exhaustive search

    Parameters
    ----------
    input_pdb: str
        path to input pdb to take structure from

    output_pdb: str
        path to write strucutre to

    csv_name: str
        path to exhaustive search csv

    params: str
        parameter

    Returns
    -------

    """

    min_occ, min_u_iso, _ = get_minimum_fofc(csv_name)

    bound_states, ground_states = get_bound_ground_states(input_pdb, params)
    pdb_inp = iotbx.pdb.input(input_pdb)
    hier = pdb_inp.construct_hierarchy()

    for chain in hier.only_model().chains():
        for residue_group in chain.residue_groups():
            for atom_group in residue_group.atom_groups():
                for atom in atom_group.atoms():

                    for ground_state in ground_states:
                        num_altlocs = ground_state[1]
                        if ground_state[0][atom.i_seq]:
                            atom.occ = (1 - min_occ) / num_altlocs
                            atom.b = u_iso_to_b_fac(min_u_iso)

                    for bound_state in bound_states:
                        num_altlocs = bound_state[1]
                        if bound_state[0][atom.i_seq]:
                            atom.set_occ(min_occ / num_altlocs)
                            atom.set_b(u_iso_to_b_fac(min_u_iso))

    with open(output_pdb, "w") as f:
        f.write(
            hier.as_pdb_string(crystal_symmetry=hierarchy.input(
                input_pdb).crystal_symmetry()))
예제 #5
0
def plot_fofc_occ(start_occ, end_occ, step, dataset_prefix, set_b):
    """Plot the difference in occupancy/fofc at the simulated occupancy
    and minima."""

    min_fofcs = []
    min_occs = []
    fofcs = []
    occs = []

    for lig_occupancy in np.arange(start_occ, end_occ + (step / 5), step):

        csv_name = "occ_{}_b_{}_u_iso".format(
            str(lig_occupancy).replace(".", "_"),
            str(set_b).replace(".", "_"))

        min_occ, min_u_iso, fo_fc_at_min = get_minimum_fofc(csv_name)

        fofc = get_fofc_from_csv(csv_name, lig_occupancy,
                                 round_step(b_to_u_iso(set_b)), step)
        fofcs.append(fofc)
        occs.append(lig_occupancy)
        min_fofcs.append(fo_fc_at_min)
        min_occs.append(min_occ)

    fig, ax = plt.subplots()
    min_plot, = ax.plot(min_occs, min_fofcs, "k+")
    occ_plot, = ax.plot(occs, fofcs, "ro")

    for i in np.arange(0, len(occs)):
        connectpoints(occs, fofcs, min_occs, min_fofcs, i)

    ax.legend(
        (min_plot, occ_plot),
        ("Minima of mean |Fo-Fc|", "Mean |Fo-Fc| at simulated occupancy"),
        prop={"size": 8},
        numpoints=1,
        bbox_to_anchor=(1, 1),
        bbox_transform=plt.gcf().transFigure,
    )

    ax.set_xlabel("Occupancy")
    ax.set_ylabel("Mean |Fo-Fc|")

    plt.title(
        "{}: Delta mean|Fo-Fc| "
        "and Delta Occupancy".format(dataset_prefix),
        fontsize=10,
    )
    plt.savefig("{}-delta_fofc_occ.png".format(dataset_prefix))
예제 #6
0
            if not "exhaustive" in program_folder:
                if os.path.exists(pdb):
                    occ_df = read_ligand(pdb_path=pdb)
                else:
                    continue
            else:
                csv = os.path.join(out_root,
                                    program_folder,
                                    xtal,
                                    "exhaustive_search.csv")

                if not os.path.exists(csv):
                    continue

                occ, u_iso, fo_fc = get_minimum_fofc(csv_name=csv)
                exhaustive_data = {"Occupancy": occ,
                        "B_factor":u_iso_to_b_fac(u_iso),
                        "min_fo_fc": fo_fc}

                occ_df = pd.DataFrame(data=exhaustive_data, index=[0])


                # Dealin with two copies of ligand
                if compound == "OX210":
                    occ_df_2 = occ_df.copy(deep=True)
                    occ_df['resid'] = 1
                    occ_df_2['resid'] = 2
                    occ_df = pd.concat([occ_df, occ_df_2])
                else:
                    occ_df['resid'] = 1
예제 #7
0
def plot_DCP2B():
    """ Plotting code for edtstats pairplots on DCP2B 
    
    Notes
    -------------
    Place holder to be better functioanlised when 
    more generalised case is apparent
    
    """

    es_minima_csv = "/dls/science/groups/i04-1/elliot-dev/Work/exhaustive_search_data/DCP2B_18_09_20_exhaus/es_minima.csv"
    edstats_csv = "/dls/science/groups/i04-1/elliot-dev/Work/exhaustive_search_data/DCP2B_18_09_20_exhaus/edstats.csv"
    out_dir = "/dls/science/groups/i04-1/elliot-dev/Work/exhaustive_search_data/DCP2B_18_09_20_exhaus"
    database_path = (
        "/dls/labxchem/data/2016/lb13385-64/processing/database/soakDBDataFile.sqlite"
    )

    edstats_df = pd.read_csv(edstats_csv)
    es_minima_df = pd.read_csv(
        es_minima_csv, names=["Dataset", "ES_occ", "es_bfac", "min_fofc"])

    summary_df = pd.merge(edstats_df, es_minima_df, on="Dataset")

    refinement_outcomes = "'4 - CompChem ready', '5 - Deposition ready','6 - Deposited'"

    print(database_path)
    conn = sqlite3.connect(database_path)
    main_table_df = pd.read_sql_query("select * from mainTable", conn)
    cur = conn.cursor()

    cur.execute(
        "SELECT CrystalName, CompoundCode, RefinementResolution "
        "FROM mainTable WHERE RefinementOutcome in ({})"
        " AND  (RefinementPDB_latest AND RefinementMTZ_latest) IS NOT NULL".
        format(refinement_outcomes))

    refinement_xtals = cur.fetchall()

    # Close connection to the database
    cur.close()

    summary_df = summary_df.rename(columns={"Dataset": "CrystalName"})
    # summary_df = pd.merge(summary_df, main_table_df, on='CrystalName')

    compounds = {}
    es_occ_b = []
    for xtal_name, compound_code, resolution in refinement_xtals:
        # xtal_name = xtal_name.encode('ascii')
        # compound_code = compound_code.encode('ascii')
        compounds[xtal_name] = compound_code

        if compound_code == "FMOPL000435a":
            csv = os.path.join(out_dir, xtal_name, "exhaustive_search.csv")
            occ, u_iso, _ = get_minimum_fofc(csv)
            es_occ_b.append([xtal_name, occ, u_iso_to_b_fac(u_iso)])

    comp_df = pd.DataFrame(list(compounds.items()),
                           columns=["CrystalName", "compound_code"])
    summary_df = pd.merge(summary_df, comp_df, on="CrystalName")

    FMOPL000435a_df = summary_df[summary_df["compound_code"] == "FMOPL000435a"]

    if not os.path.exists(os.path.join(out_dir, "pairplot.png")):
        pairplot = labelled_pairplot(summary_df, hue_column="compound_code")
        fig = pairplot.fig
        fig.savefig(os.path.join(out_dir, "pairplot.png"), dpi=300)

    if not os.path.exists(os.path.join(out_dir, "FMOPL000435a_pairplot.png")):
        FMOPL000435a_pairplot = labelled_pairplot(FMOPL000435a_df)
        fig = FMOPL000435a_pairplot.fig
        fig.savefig(os.path.join(out_dir, "FMOPL000435a_pairplot.png"),
                    dpi=300)

    FMOPL000435a_df = FMOPL000435a_df.rename(index=str,
                                             columns={
                                                 "ES_occ": "es_occupancy",
                                                 "Occupancy": "occupancy"
                                             })

    params = master_phil.extract()
    params.output.out_dir = "/dls/science/groups/i04-1/elliot-dev/Work/exhaustive_search_data/DCP2B_18_09_20_exhaus"

    occupancy_histogram_with_exhaustive_search(FMOPL000435a_df,
                                               protein_name="DCP2B",
                                               compound="FMOPL000435a",
                                               params=params)
    # occupancy_b_factor_scatter_plot(FMOPL000435a_df,
    #                                 protein_name=protein_name,
    #                                 compound=compound,
    #                                 params=params)

    summary_df.to_csv(os.path.join(out_dir, "DCP2B_edstats_summary.csv"))
    FMOPL000435a_df.to_csv(
        os.path.join(out_dir, "FMOPL000435a_edstats_summary.csv"))
    duplicate_compound_df = pd.concat(
        g for _, g in summary_df.groupby("compound_code") if len(g) > 1)

    summary_duplicate_df_list = []
    for duplicate_compound in duplicate_compound_df["compound_code"].unique():
        duplicate_df = summary_df[summary_df["compound_code"] ==
                                  duplicate_compound]

        summary = {
            "compound": [duplicate_compound],
            "number refined hits": [len(duplicate_df.index)],
            "RSCC min": [duplicate_df["RSCC"].min()],
            "RSCC max": [duplicate_df["RSCC"].max()],
            "Occ refined min": [duplicate_df["Occupancy"].min()],
            "Occ refined max": [duplicate_df["Occupancy"].max()],
            "Occ ES min": [duplicate_df["ES_occ"].min()],
            "Occ ES max": [duplicate_df["ES_occ"].max()],
        }
        summary_duplicate_df_list.append(pd.DataFrame(data=summary))

    pd.concat(summary_duplicate_df_list).to_csv(
        os.path.join(out_dir, "DCP2B_edstats_duplicates.csv"))
예제 #8
0
from exhaustive.utils.utils import get_minimum_fofc

exh_b_fix = "/dls/science/groups/i04-1/elliot-dev/Work/NUDT7A_mass_spec_refinements/copy_atoms/exhaustive_b_fix/2019-06-17/"

folders = [
    os.path.join(exh_b_fix, d) for d in os.path.listdir(exh_b_fix)
    if os.path.isdir(os.path.join(exh_b_fix, d))
]

for folder in exh_b_fix:

    exh_csv = os.path.join(exh_b_fix, folder, "exhaustive_search.csv")

    if os.path.exists(exh_b_fix, folder) == exh_csv:

        print(get_minimum_fofc(csv_name=exh_csv))
예제 #9
0
def process_exhaustive_search(compound_codes,
                              initial_model_dir,
                              in_dir,
                              out_dir,
                              protein_name,
                              preferred_cif=None):
    protein_prefix = protein_name + "-x"

    for compound in compound_codes:

        es_occ_b = []

        for dataset in datasets_from_compound(protein_prefix,
                                              compound_folder=os.path.join(
                                                  in_dir, compound)):
            # Define paths
            es_csv = os.path.join(out_dir, compound, dataset,
                                  dataset + "_exhaustive_search_occ_u_iso.csv")
            refine_pdb = os.path.join(in_dir, compound, dataset, "refine.pdb")
            es_pdb = os.path.join(out_dir, compound, dataset, "es_minima.pdb")
            es_refine_pdb = os.path.join(out_dir, compound, dataset,
                                         "exhaustive_search0001",
                                         "es_refine.pdb")
            es_refine_mtz = os.path.join(out_dir, compound, dataset,
                                         "exhaustive_search0001",
                                         "es_refine.mtz")
            input_mtz = os.path.join(in_dir, compound, dataset, "refine.mtz")
            # input_mtz = os.path.join(initial_model_dir, dataset,
            #                          dataset + ".free.mtz")
            input_mtz = os.path.join(initial_model_dir, dataset, "refine.mtz")
            input_pdb = os.path.join(initial_model_dir, dataset, "refine.pdb")
            print(input_mtz)
            print(input_pdb)
            print("---------------------")

            dataset_dir = os.path.join(initial_model_dir, dataset)

            # Generate split conformations
            if not os.path.exists(
                    os.path.join(in_dir, compound, dataset,
                                 "refine.split.ground-state.pdb")):
                split_params = split_phil.extract()
                split_params.input.pdb = [
                    os.path.join(in_dir, compound, dataset, "refine.pdb")
                ]
                split_params.output.log = "cat.log"
                print(split_params.input.pdb)
                print(split_phil.format(python_object=split_params).as_str())
                try:
                    split_conformations(split_params)
                except IOError:
                    print("Split confs: Issue in parsing:{}".format(dataset))
                    continue

            # Write Minima pdb
            if not os.path.exists(es_pdb):
                try:
                    write_minima_pdb(input_pdb=refine_pdb,
                                     output_pdb=es_pdb,
                                     csv_name=es_csv,
                                     params=params)
                except IOError:
                    print("Issue in parsing:{}".format(dataset))
                    continue

            input_cif = get_cif_file_from_dataset(dataset_dir, preferred_cif)

            # Refinement of minima pdb
            # TODO remove explicit call to 0001
            if not os.path.exists(es_refine_pdb):
                try:
                    os.chdir(os.path.join(out_dir, compound, dataset))
                    os.system("giant.quick_refine input.pdb={} "
                              "input.mtz={} input.cif={} "
                              "dir_prefix='exhaustive_search' "
                              "output.out_prefix='es_refine' ".format(
                                  es_pdb, input_mtz, input_cif))
                except IOError:
                    print("Skipping crystal")
                    continue

            es_minima_plot_folder = os.path.join(out_dir, compound, dataset,
                                                 "Plots")
            # Plotting spider plots of minima pdb
            if not os.path.exists(es_minima_plot_folder):
                score_params = score_phil.extract()
                score_params.input.pdb1 = input_pdb
                score_params.input.mtz1 = input_mtz
                score_params.input.pdb2 = es_refine_pdb
                score_params.input.mtz2 = es_refine_mtz
                score_params.output.out_dir = es_minima_plot_folder
                try:
                    score_model(score_params)
                except:
                    print("skipping edstats for {}".format(dataset))

            # Minima occupancy and B factor for histogram/ scatter summary
            try:
                occ, u_iso, _ = get_minimum_fofc(es_csv)
            except IOError:
                print("Issue in parsing:{}".format(dataset))
                continue

            es_occ_b.append([dataset, occ, u_iso_to_b_fac(u_iso)])

        print(es_occ_b)
        es_occ_df = pd.DataFrame(
            data=es_occ_b, columns=['dataset', 'es_occupancy', 'es_b_fac'])

        refine_occ_df = get_occ_b(refinement_dir=os.path.join(
            in_dir, compound),
                                  lig_chain="B",
                                  pdb_name="refine.split.bound-state.pdb")

        occ_df = pd.merge(es_occ_df, refine_occ_df, on='dataset', how='outer')

        params.output.out_dir = os.path.join(out_dir, compound)

        occupancy_histogram_with_exhaustive_search(occ_df,
                                                   protein_name=protein_name,
                                                   compound=compound,
                                                   params=params)
        occupancy_b_factor_scatter_plot(occ_df,
                                        protein_name=protein_name,
                                        compound=compound,
                                        params=params)

        edstats_df = collate_edstats_scores(protein_prefix=protein_prefix,
                                            compound_folder=os.path.join(
                                                out_dir, compound))
        if edstats_df is not None:
            plot_edstats_across_soaks(edstats_df=edstats_df,
                                      compound_folder=os.path.join(
                                          out_dir, compound),
                                      compound=compound,
                                      protein_name=protein_name,
                                      title_suffix="Exhaustive minima")
예제 #10
0
    refine_occ_df = get_occ_b(refinement_dir=covalent_dir,
                              lig_chain="E",
                              pdb_name="refine.split.bound-state.pdb")

    exhaustive_search_csvs = [
        os.path.join(covalent_dir, xtal_dir, xtal_dir.rstrip('_LIG_CYS'),
                     "exhaustive_search.csv")
        for xtal_dir in os.listdir(covalent_dir)
        if os.path.isdir(os.path.join(covalent_dir, xtal_dir))
        and xtal_dir.endswith("LIG_CYS")
    ]

    es_occ_b = []
    for csv in exhaustive_search_csvs:
        if os.path.exists(csv):
            occ, u_iso, _ = get_minimum_fofc(csv)
            es_occ_b.append([
                os.path.basename(os.path.dirname(csv)).rstrip('_LIG_CYS'), occ,
                u_iso_to_b_fac(u_iso)
            ])

    es_occ_df = pd.DataFrame(data=es_occ_b,
                             columns=['dataset', 'es_occupancy', 'es_b_fac'])

    occ_df = pd.merge(es_occ_df, refine_occ_df, on='dataset', how='outer')

    occupancy_histogram_with_exhaustive_search(occ_df,
                                               protein_name="NUDT7A",
                                               compound="NUOOA000181a",
                                               params=params)