Beispiel #1
0
def topOccupancy(PDB):
    import os, sys
    from iotbx import pdb
    from iotbx.pdb import hierarchy
    import itertools
    occ = float(0.1)
    pdb_in = hierarchy.input(PDB)
    symm = pdb_in.crystal_symmetry()
    obj_pdb = pdb_in.construct_hierarchy()
    selected_atoms = obj_pdb.atom_selection_cache().iselection("occupancy>" +
                                                               str(occ) + " ")
    counter = int(len(selected_atoms))
    # counter=17
    if (counter > 0):
        while (counter > 2):
            occ = occ + float(0.02)
            # print ("value of counter is %d and occ is %f",counter, occ)
            selected_atoms = obj_pdb.atom_selection_cache().iselection(
                "occupancy>" + str(occ) + " ")
            counter = int(len(selected_atoms))
            if (counter < 6):
                # print ("value of counter inside if is %d ",counter)
                newHi = obj_pdb.select(selected_atoms)
                newHi.write_pdb_file(
                    file_name=os.path.join(str(counter) + "_.pdb"))

    else:
        print("occupancy is lower than 0.1")
Beispiel #2
0
def get_lig(pdb):

    lig_names = ["LIG", "UNL", "DRG"]

    # read into iotbx.hierarchy
    pdb_in = hierarchy.input(file_name=pdb)
    # read into iotbx.selection cache
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    lig_pos = []
    for lig in lig_names:
        sel = sel_cache.selection("resname {}".format(lig))
        hier = pdb_in.hierarchy.select(sel)

        if hier.models_size() == 0:
            continue

        for chain in hier.only_model().chains():
            for residue_group in chain.residue_groups():
                for atom_group in residue_group.atom_groups():
                    chain_id = chain.id
                    resname = atom_group.resname
                    resseq = str(int(residue_group.resseq))
                    lig_pos.append((chain_id, resname, resseq))

    return lig_pos
Beispiel #3
0
def map_sites_to_asu(spacegroup, pdb_in, pdb_out, invert=False):
    '''Map sites to asu of input spacegroup (as sites from shelxd claim
    P1 in CRYST1 record) inverting if necessary. N.B. if inverting sites
    also need to invert spacegroup.'''

    from cctbx.crystal import symmetry, direct_space_asu
    from iotbx.pdb import hierarchy
    from scitbx.array_family import flex

    sg = space_group(space_group_symbols(spacegroup).hall())
    coords = hierarchy.input(file_name=pdb_in)
    cs = coords.input.crystal_symmetry()
    uc = cs.unit_cell()
    cs2 = symmetry(unit_cell=uc, space_group=sg)
    xs = coords.xray_structure_simple().customized_copy(crystal_symmetry=cs2)

    if invert:
        xs = xs.change_hand()

    am = xs.crystal_symmetry().asu_mappings(0.0)
    xyz = xs.sites_cart()
    am.process_sites_cart(xyz)
    xyz = flex.vec3_double()
    for m in am.mappings():
        xyz.append(m[0].mapped_site())
    xs.set_sites_cart(xyz)

    open(pdb_out, 'w').write(xs.as_pdb_file())

    return
def set_b_factor_pdb(row, rerun=False):
    pdb_in_path = row["refine_pdb"]
    pdb_out_path = row["site_b_factor_path"]

    # don't do if output file exists, or refine pdb doesn't exist, unless rerun flag set
    if (not os.path.exists(pdb_out_path) and os.path.exists(pdb_in_path)) or rerun:
        pdb_in = hierarchy.input(file_name=pdb_in_path)
        sites = row["sites"]

        # for each site listed in site set the B factor of that site
        # to the mean b factor of that site
        for site in sites:
            allocated_col = site[0]
            sel = residue_list_to_selection(row[site[0]])
            lig_col = allocated_col.replace("allocated", "lig")
            if len(row[lig_col]) > 1:
                raise ValueError("More than one allocated residue")
            else:
                chain = list(row[lig_col])[0][0]
                lig_num = list(row[lig_col])[0][1]

            lig_sel = "(chain " + chain + " and resid " + str(int(lig_num)) + ")"
            sel = sel + " or " + lig_sel

            b_fac = site[1]
            pdb_in = set_b_factor(pdb_in=pdb_in, sel=sel, b_fac=b_fac)

        if len(sites) != 0:
            with open(pdb_out_path, "w") as out_pdb_file:
                out_pdb_file.write(
                    pdb_in.hierarchy.as_pdb_string(
                        crystal_symmetry=pdb_in.input.crystal_symmetry()
                    )
                )
def get_occ_b(pdb, chain, resid, altloc=""):
    """
    Get occupancy and b factor of a single residue

    Parameters
    ----------
    pdb: str
        path to pdb file

    chain: str
        chain of interest

    resid: str
        residue of interest

    altloc: str
        altloc of interest

    Returns
    -------
    mean_occ: float
        mean occupancy of residue

    mean_b: float
        mean b factor of residue

    std_b: float
        standard deviation of b factor of refisude

    """

    # read into iotbx.hierarchy
    pdb_in = hierarchy.input(file_name=pdb)
    # read into iotbx.selection cache
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    # Get selection object which corresponds to supplied chain residue id and altloc
    sel = sel_cache.selection("chain {} resid {} altloc {}".format(
        chain, resid, altloc))
    # Select that residue from main hierarchy
    hier = pdb_in.hierarchy.select(sel)
    resnames = []
    for chain in hier.only_model().chains():
        for residue_group in chain.residue_groups():
            for atom_group in residue_group.atom_groups():
                resnames.append(atom_group.resname)

                # Get B factor and occ information on residue by looking a individual atoms
                b = []
                occ = []
                for atom in atom_group.atoms():
                    b.append(atom.b)
                    occ.append(atom.occ)

                    mean_occ = np.mean(occ)
                    mean_b = np.mean(b)
                    std_b = np.std(b)

                    return mean_occ, mean_b, std_b
Beispiel #6
0
def write_minima_pdb(input_pdb, output_pdb, csv_name, params):
    """
    Write pdb from the minima in exhaustive search

    Parameters
    ----------
    input_pdb: str
        path to input pdb to take structure from

    output_pdb: str
        path to write strucutre to

    csv_name: str
        path to exhaustive search csv

    params: str
        parameter

    Returns
    -------

    """

    min_occ, min_u_iso, _ = get_minimum_fofc(csv_name)

    bound_states, ground_states = get_bound_ground_states(input_pdb, params)
    pdb_inp = iotbx.pdb.input(input_pdb)
    hier = pdb_inp.construct_hierarchy()

    for chain in hier.only_model().chains():
        for residue_group in chain.residue_groups():
            for atom_group in residue_group.atom_groups():
                for atom in atom_group.atoms():

                    for ground_state in ground_states:
                        num_altlocs = ground_state[1]
                        if ground_state[0][atom.i_seq]:
                            atom.occ = (1 - min_occ) / num_altlocs
                            atom.b = u_iso_to_b_fac(min_u_iso)

                    for bound_state in bound_states:
                        num_altlocs = bound_state[1]
                        if bound_state[0][atom.i_seq]:
                            atom.set_occ(min_occ / num_altlocs)
                            atom.set_b(u_iso_to_b_fac(min_u_iso))

    with open(output_pdb, "w") as f:
        f.write(
            hier.as_pdb_string(crystal_symmetry=hierarchy.input(
                input_pdb).crystal_symmetry()))
def residues_near_ligs(pdb, cutoff):
    """
    Get residues within angstrom cutoff of LIG.

    Parameters
    ----------
    pdb: str, path
        path to pdb file
    cutoff: float
        angstrom cutoff for distance

    Returns
    -------
    ag_set: set
        a set of dicts
        ligand chain: str
        ligand resseq: str
        protein chain: str
        protein resname: str
        protein resseq: str
    """
    if not os.path.exists(pdb):
        return None
    # Load the structure
    prot_i = hierarchy.input(pdb)
    prot_h = prot_i.hierarchy

    # Extract the ligands from the hierarchy
    lig_ags = [ag for ag in prot_h.atom_groups() if ag.resname == "LIG"]

    # all non ligand atom groups
    not_lig_ags = [ag for ag in prot_h.atom_groups() if ag.resname != "LIG"]

    # atom_groups_near_lig
    ag_set = set()
    for lig in lig_ags:
        lig_chain = lig.parent().parent().id
        lig_resseq = lig.parent().resseq

        for ag in not_lig_ags:
            ag_chain = ag.parent().parent().id

            if is_within(cutoff, ag.atoms().extract_xyz(), lig.atoms().extract_xyz()):
                ag_set.add(
                    (lig_chain, lig_resseq, ag_chain, ag.resname, ag.parent().resseq,)
                )

    return ag_set
def residue_select_hierarchy_from_pdb(pdb_path,
                                      residues_select,
                                      invert_selection=False):
    """
    Produce hierarchy selection object based
    on supplied residue list

    Parameters
    ----------
    pdb_path: str
        path to pdb file from whcih atoms are taken
    residues_select: list
        list of residues in format [[chain,resid],[chain,resid]]:
        [['A', '24'], ['A', '25']]
    invert_selection: bool
        Flag to invert selection to residues that are not in residues_select

    Returns
    -------
    new_atoms_hier

    """

    # read in PDB file from which atoms are to be taken from
    pdb_in = hierarchy.input(file_name=pdb_path)
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    # produce a hierarchy with atoms to copied
    selection_string_list = []
    chains_new = set()
    for residue_new in residues_select:
        selection_string = "(resid {} and chain {})".format(residue_new[1],
                                                            residue_new[0])
        selection_string_list.append(selection_string)
        chains_new.add(residue_new[0])
    selection_string = "or".join(selection_string_list)

    # Used to select all atoms but residues_select
    if invert_selection:
        selection_string = "not ({})".format(selection_string)

    new_atoms_sel = sel_cache.selection(selection_string)
    new_atoms_hier = pdb_in.hierarchy.select(new_atoms_sel)

    return new_atoms_hier
Beispiel #9
0
def exercise_misc () :
  import libtbx.load_env
  if (not libtbx.env.has_module("iotbx")) : return
  from iotbx.pdb import hierarchy
  # Pair 1: 0.5 A apart, mean displacement = 0.4 A
  # Pair 2: 1.5 A apart, mean displacement = 0.6 A
  pdb_in = hierarchy.input(pdb_string="""
CRYST1   10.000   11.000   12.000  70.00  80.00  90.00 P 1
HETATM    1  O  AHOH A   1       4.000   5.000   3.000  1.00 12.63           O
HETATM    2  O  BHOH A   1       4.500   5.000   3.000  1.00 12.63           O
HETATM    3  O  AHOH A   2       7.000   1.000   6.000  1.00 28.42           O
HETATM    4  O  BHOH A   2       8.500   1.000   6.000  1.00 28.42           O
END""")
  xrs = pdb_in.input.xray_structure_simple()
  unit_cell = xrs.unit_cell()
  sc = xrs.scatterers()
  delta12 = adptbx.intersection(
    u_1=sc[0].u_iso,
    u_2=sc[1].u_iso,
    site_1=sc[0].site,
    site_2=sc[1].site,
    unit_cell=xrs.unit_cell())
  xrs.convert_to_anisotropic()
  delta12_aniso = adptbx.intersection(
    u_1=sc[0].u_star,
    u_2=sc[1].u_star,
    site_1=sc[0].site,
    site_2=sc[1].site,
    unit_cell=xrs.unit_cell())
  # XXX on certain platforms the floating-point precision fails us
  assert approx_equal(delta12_aniso, delta12, eps=0.0000000000001)
  assert approx_equal(delta12, 0.2999, eps=0.0001)
  delta34 = adptbx.intersection(
    u_1=sc[2].u_star,
    u_2=sc[3].u_star,
    site_1=sc[2].site,
    site_2=sc[3].site,
    unit_cell=xrs.unit_cell())
  assert approx_equal(delta34, -0.300094, eps=0.000001)
  delta34b = xrs.intersection_of_scatterers(2,3)
  assert (delta34b == delta34)
def read_occupancy_b(pdb_path, selection):
    """Extract occupancy and B factor of pdb given selection"""

    if not os.path.exists(pdb_path):
        return None

    # Read in single PDB file
    pdb_in = hierarchy.input(file_name=pdb_path)
    sel_cache = pdb_in.hierarchy.atom_selection_cache()
    sel = sel_cache.selection(selection)
    sel_hierarchy = pdb_in.hierarchy.select(sel)

    occ_b = []
    # Get occupancy & B factor of ligand
    for model in sel_hierarchy.models():
        for chain in model.chains():
            for rg in chain.residue_groups():
                for ag in rg.atom_groups():
                    for atom in ag.atoms():
                        occ_b.append(
                            [
                                ag.resname,
                                rg.resseq,
                                ag.altloc,
                                atom.name,
                                atom.occ,
                                atom.b,
                            ]
                        )

    return pd.DataFrame(
        occ_b,
        columns=[
            "Residue",
            "resseq",
            "altloc",
            "Atom",
            "Occupancy",
            "B_factor",
        ],
    )
def get_occupancy_groups(pdb, params):
    """
    Calculate occupancy groups given pdb file path.
    
    Wrapper of giant.structure.restraints.occupancy: overlapping_occupancy_groups(), 
    that generates hierarchy from pdb file path

    Parameters
    ----------

    :param pdb: 
    :param params: 

    Returns
    -------

    """

    logging.info("Gathering occupancy group information from PDB: %s", pdb)
    print("Gathering occupancy group information from PDB: %s", pdb)
    pdb_in = hierarchy.input(pdb)

    resnames = params.select.resnames.split(",")

    logging.info("Looking for ligands with resname {!s}".format(
        " or ".join(resnames)))

    occupancy_groups = overlapping_occupancy_groups(
        hierarchy=pdb_in.hierarchy,
        resnames=resnames,
        group_dist=params.select.group_dist,
        overlap_dist=params.select.overlap_dist,
        complete_groups=params.select.complete_groups,
        exclude_altlocs=params.select.exclude_altlocs.split(",")
        if params.select.exclude_altlocs else [],
        verbose=params.select.verbose,
    )

    return occupancy_groups
Beispiel #12
0
def sortOccupancy(PDB):
    '''
    This function generates PDB file of atoms having better than 0.5 occupancy.
    It sorts the atoms according to the decending order of their occupancy and
    writes a pair atoms from combination of top 5 atoms into separate PDB files.
    '''
    import os, sys
    from iotbx import pdb
    from iotbx.pdb import hierarchy
    import itertools
    mylist = []
    pdb_in = hierarchy.input(PDB)
    symm = pdb_in.crystal_symmetry()
    obj_pdb = pdb_in.construct_hierarchy()
    selected_atoms = obj_pdb.atom_selection_cache().iselection("occupancy>0.5")
    if (len(selected_atoms) > 1):
        for e in selected_atoms:
            mylist.append(obj_pdb.atoms()[e])
        sorted_atoms = sorted(mylist,
                              key=lambda thisatom: thisatom.occ,
                              reverse=True)
        atoms2pdb(sorted_atoms).write_pdb_file(file_name="topOcc_.pdb")
        ##the following will generate PDB for each atom in the topOcc_.pdb file
        for e in range(0, len(sorted_atoms)):
            atoms2pdb([sorted_atoms[e]]).write_pdb_file(
                file_name="topOcc_" + str(e) + "_.pdb",
                crystal_symmetry=pdb_in.input.crystal_symmetry(),
                append_end=True)
        ##the following list will generate the combination of the top 5 atoms
        iterableList = itertools.combinations(sorted_atoms[0:5], 2)
        counter = int(1)
        for e in list(iterableList):
            atoms2pdb(e).write_pdb_file(
                file_name="combination" + str(counter) + "_.pdb",
                crystal_symmetry=pdb_in.input.crystal_symmetry(),
                append_end=True)
            counter = counter + 1
    else:
        print("occupancy is lower than 0.5")
Beispiel #13
0
def exercise_misc():
    import libtbx.load_env

    if not libtbx.env.has_module("iotbx"):
        return
    from iotbx.pdb import hierarchy

    # Pair 1: 0.5 A apart, mean displacement = 0.4 A
    # Pair 2: 1.5 A apart, mean displacement = 0.6 A
    pdb_in = hierarchy.input(
        pdb_string="""
CRYST1   10.000   11.000   12.000  70.00  80.00  90.00 P 1
HETATM    1  O  AHOH A   1       4.000   5.000   3.000  1.00 12.63           O
HETATM    2  O  BHOH A   1       4.500   5.000   3.000  1.00 12.63           O
HETATM    3  O  AHOH A   2       7.000   1.000   6.000  1.00 28.42           O
HETATM    4  O  BHOH A   2       8.500   1.000   6.000  1.00 28.42           O
END"""
    )
    xrs = pdb_in.input.xray_structure_simple()
    unit_cell = xrs.unit_cell()
    sc = xrs.scatterers()
    delta12 = adptbx.intersection(
        u_1=sc[0].u_iso, u_2=sc[1].u_iso, site_1=sc[0].site, site_2=sc[1].site, unit_cell=xrs.unit_cell()
    )
    xrs.convert_to_anisotropic()
    delta12_aniso = adptbx.intersection(
        u_1=sc[0].u_star, u_2=sc[1].u_star, site_1=sc[0].site, site_2=sc[1].site, unit_cell=xrs.unit_cell()
    )
    # XXX on certain platforms the floating-point precision fails us
    assert approx_equal(delta12_aniso, delta12, eps=0.0000000000001)
    assert approx_equal(delta12, 0.2999, eps=0.0001)
    delta34 = adptbx.intersection(
        u_1=sc[2].u_star, u_2=sc[3].u_star, site_1=sc[2].site, site_2=sc[3].site, unit_cell=xrs.unit_cell()
    )
    assert approx_equal(delta34, -0.300094, eps=0.000001)
    delta34b = xrs.intersection_of_scatterers(2, 3)
    assert delta34b == delta34
def read_ligand_occupancy_b(pdb_path, params):
    """Extract occupancy and B factor of ligand of interest from one PDB file into a dataframe"""

    # Input: A PDB structure. XCE database via params
    # Options: Read the surrounding structure as well as the ligand. Angstrom distance?
    # Output: Occupancy for ligand in supplied pdb. Dict including chain & altloc?

    # Get ligand chain that is associated with Event
    pandda_lig_chain = get_pandda_or_any_lig_chain(pdb_path, params)

    # This should be the case when the dataset has not passed through pandda.export
    if pandda_lig_chain is None:
        return None

    # Read in single PDB file
    print(pdb_path)
    pdb_in = hierarchy.input(file_name=pdb_path)
    sel_cache = pdb_in.hierarchy.atom_selection_cache()
    lig_sel = sel_cache.selection("chain {}".format(pandda_lig_chain))
    lig_hierarchy = pdb_in.hierarchy.select(lig_sel)

    print("Pandda_lig_chain:".format(pandda_lig_chain))

    print_hier_atoms(lig_hierarchy)

    lig_occ_b = []
    # Get occupancy & B factor of ligand
    for model in lig_hierarchy.models():
        for chain in model.chains():
            for rg in chain.residue_groups():
                for ag in rg.atom_groups():
                    for atom in ag.atoms():
                        lig_occ_b.append([atom.name, atom.occ, atom.b])
    occ_b_df = pd.DataFrame(lig_occ_b, columns=["Atom", "Occupancy", "B_factor"])

    return occ_b_df
from iotbx.pdb import hierarchy
pdb_in = hierarchy.input(file_name="6f0o.pdb")
pdb_atoms = pdb_in.hierarchy.atoms()
for i in pdb_atoms:
    print i.xyz
    print i.b

xray_structure = pdb_in.input.xray_structure_simple()
sel_cache = pdb_in.hierarchy.atom_selection_cache()
c_alpha_sel = sel_cache.selection("name ca")  # XXX not case sensitive!
c_alpha_atoms = pdb_atoms.select(c_alpha_sel)
c_alpha_xray_structure = xray_structure.select(c_alpha_sel)
c_alpha_hierarchy = pdb_in.hierarchy.select(c_alpha_sel)
            # for residue_group in chain.residue_groups():
            #     print(int(residue_group.resid()),
            #           int(residue_group.resseq),
            #           chain.id,
            #           copy_chain.id)
            #     print()
            #     copy_chain.remove_residue_group(int(residue_group.resid()))

            for residue_group in chain.residue_groups():
                if int(residue_group.resseq) < min(loop_resid):
                    new_chain.append_residue_group(residue_group.detached_copy())

            for residue_group in chain.residue_groups():
                if int(residue_group.resseq) in loop_resid:
                    new_chain.append_residue_group(residue_group.detached_copy())

            for residue_group in chain.residue_groups():
                if int(residue_group.resseq) > max(loop_resid):
                    new_chain.append_residue_group(residue_group.detached_copy())

            multiple_loop_hier_copy.only_model().append_chain(new_chain)


    multiple_loop_hier_copy.reset_i_seq_if_necessary()

    base_pdb_in = hierarchy.input(base_pdb)
    f = open(os.path.join(path,"alt_multiple_loop.pdb"), "w+")
    f.write(multiple_loop_hier_copy.as_pdb_string(
        atoms_reset_serial_first_value = 1,
        crystal_symmetry=base_pdb_in.input.crystal_symmetry()))
    f.close()
Beispiel #17
0
def update_from_pdb(pdb_df):
    """
    Find residue name, B factors given DataFrame with LIG

    Carries out cctbx.iotbx dependent searching of pdb file.
    Requires a dataframe where the row has at least,
        pdb_latest: The

    Parameters
    ----------
    pdb_df: Pandas.DataFrame

    Returns
    -------
    pandas.DataFrame:

    """
    # loop over rows/ residues
    rows = []
    for index, row in pdb_df.iterrows():

        # read into iotbx.hierarchy
        pdb_in = hierarchy.input(file_name=row.pdb_latest)
        # read into iotbx.selection cache
        sel_cache = pdb_in.hierarchy.atom_selection_cache()

        print(row.pdb_latest)
        sel = sel_cache.selection("resname LIG")

        # Select that residue from main hierarchy
        hier = pdb_in.hierarchy.select(sel)

        # catch when multiple models are in pdb file
        try:
            model = hier.only_model()
        except AssertionError:
            pass
        try:
            model = hier.models()[0]
        except IndexError:
            continue

        for chain in model.chains():
            for residue_group in chain.residue_groups():
                for atom_group in residue_group.atom_groups():

                    # copy the row so that the append doesn't
                    # end up appending a series of pointers
                    # to the same object
                    copy_row = row.copy(deep=True)

                    b = []
                    occ = []
                    # Get B factor information on residue by looking a individual atoms
                    for atom in atom_group.atoms():
                        b.append(atom.b)
                        occ.append(atom.occ)

                        # print(atom_group.resname,
                        #       residue_group.resseq,
                        #       atom_group.altloc,
                        #       atom.b,
                        #       atom.occ)

                    occupancy = np.mean(occ)
                    mean_b = np.mean(b)
                    std_b = np.std(b)

                    copy_row["chain"] = chain.id
                    copy_row["resseq"] = residue_group.resseq
                    copy_row["altloc"] = atom_group.altloc
                    copy_row["occupancy"] = occupancy
                    copy_row["B_mean"] = mean_b
                    copy_row["B_std"] = std_b
                    rows.append(copy_row)
        # else:
        #     raise ValueError(
        #         "Multiple residues for selection"
        #         # "chain {} resid {} altloc {} "
        #         # "of pdb: {}".format(row.chain, row.resid, row.alte, pdb)
        #     )

    # Append rows
    pdb_df = pd.concat(rows, axis=1).T

    # As series are single datatype,
    # one should not work row by row
    # This will cause the whole dataframe
    # to be of object datatype.
    # This is a poor quality fix for working row by row
    pdb_df["occupancy"] = pdb_df["occupancy"].astype(float)
    pdb_df["B_mean"] = pdb_df["B_mean"].astype(float)
    pdb_df["B_std"] = pdb_df["B_std"].astype(float)

    # Aggregation can combine rows with different methods.
    # here we sum occupancy across altloc
    # and average the other quantities for the resseq
    pdb_df = pdb_df.groupby(
        [
            "resseq", "crystal_name", "pdb_latest", "mtz_latest", "refine_log",
            "chain"
        ],
        as_index=False,
    ).agg({
        "occupancy": "sum",
        "B_mean": "mean",
        "B_std": "mean"
    })
    return pdb_df
Beispiel #18
0
    except ValueError:
        template = "data.lat"
    else:
        template = args.pop(idx).split("=")[1]

# Unit cell file

    try:
        idx = [(a.find("cell") == 0 or a.find("cell_file") == 0)
               for a in args].index(True)
    except ValueError:
        cell_file = "cell"
    else:
        cell_file = args.pop(idx).split("=")[1]

    pdb_in = hierarchy.input(file_name=pdb_file)

    xrs = pdb_in.input.xray_structure_simple()

    if (bfacs == "zero"):
        xrs.convert_to_isotropic()
        xrs.set_b_iso(0.0)

    if (bfacs == "iso"):
        xrs.convert_to_isotropic()

    fcalc = xrs.structure_factors(d_min=1.0).f_calc()
    fc_square = fcalc.as_intensity_array()
    fc_square_p1 = fc_square.expand_to_p1()

    f = open("tmp.hkl", 'w')
Beispiel #19
0
    avg = sum(times) / (5 + runs)
    stdev = np.std(times)
    print(name + "\t" + str(avg) + "\t" + str(stdev) + "\t" + str(5 + runs))


def time_function_multiple(fn, subjects, global_name):
    for (name, subject) in subjects:
        time_function(fn, subject, global_name + "\t" + name)


names = [
    ("small", "example-pdbs/1ubq.pdb"),
    ("medium", "example-pdbs/1yyf.pdb"),
    ("big", "example-pdbs/pTLS-6484.pdb"),
]

proteins = [
    ("small", Hierarchy.input(file_name="example-pdbs/1ubq.pdb")),
    ("medium", Hierarchy.input(file_name="example-pdbs/1yyf.pdb")),
    ("big", Hierarchy.input(file_name="example-pdbs/pTLS-6484.pdb")),
]

time_function_multiple(open_pdb, names, "open")
time_function_multiple(transformation, proteins, "transformation")
time_function_multiple(remove, proteins, "remove")
time_function_multiple(iteration, proteins, "iteration")
time_function_multiple(iteration_build_in, proteins, "iteration_build_in")
time_function_multiple(renumber, proteins, "renumber")
time_function_multiple(clone, proteins, "clone")
time_function_multiple(save, proteins, "save")
Beispiel #20
0
def update_from_pdb(pdb_df):
    """
    Find residue name, B factors given DataFrame with chain, residue id and altloc

    Carries out cctbx.iotbx dependent searching of pdb file.
    Requires a dataframe where the row has at least,
        pdb_latest: The

    Parameters
    ----------
    pdb_df: Pandas.DataFrame

    Returns
    -------
    pandas.DataFrame:

    """
    # Load pdb path from DataFrame
    # need to select first unique value as there will be duplicates
    # of name for every residue
    pdb = pdb_df.pdb_latest.unique()[0]

    # read into iotbx.hierarchy
    pdb_in = hierarchy.input(file_name=pdb)
    # read into iotbx.selection cache
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    # loop over rows/ residues
    rows = []
    for index, row in pdb_df.iterrows():

        try:
            # Get selection object which corresponds to supplied chain residue id and altloc
            # Type conversion in res.id neeed otherwise nothing is selected
            sel = sel_cache.selection(
                "chain {} and resid {} and altloc {}".format(
                    row.chain, str(int(row.resid)), row.alte))
        except AttributeError:
            # Use ligand LIG instead of chain resid and alte
            # This doesn't work at the next step, a large number
            # are being dropped under "Likely dummy atoms"
            sel = sel_cache.selection("resname LIG")

        # Select that residue from main hierarchy
        hier = pdb_in.hierarchy.select(sel)
        resnames = []

        # catch when multiple models are in pdb file
        try:
            model = hier.only_model()
        except AssertionError:
            pass
        try:
            model = hier.models()[0]
        except IndexError:
            continue

        for chain in model.chains():
            for residue_group in chain.residue_groups():
                for atom_group in residue_group.atom_groups():
                    resnames.append(atom_group.resname)

                    # Get B factor information on residue by looking a individual atoms
                    b = []
                    for atom in atom_group.atoms():
                        b.append(atom.b)

                    mean_b = np.mean(b)
                    std_b = np.std(b)

        # Append information to row
        # if len(resnames) == 1:
        row["resname"] = resnames[0]
        row["B_mean"] = mean_b
        row["B_std"] = std_b
        rows.append(row)
        # else:
        #     raise ValueError(
        #         "Multiple residues for selection"
        #         # "chain {} resid {} altloc {} "
        #         # "of pdb: {}".format(row.chain, row.resid, row.alte, pdb)
        #     )

    # Append rows)
    pdb_df = pd.concat(rows, axis=1)

    # Transpose to get in same orientation as input
    return pdb_df.T
def copy_b(pdb, ref_pdb, out_pdb, chain, resid, altloc=""):
    """
    Copy b factor of a single residue to another pdb file

    Parameters
    ----------
    pdb: str
        path to pdb file

    ref_pdb: str
        path to reference pdb file

    chain: str
        chain of interest

    resid: str
        residue of interest

    altloc: str
        altloc of interest

    Returns
    -------


    """

    # read into iotbx.hierarchy
    ref_pdb_in = hierarchy.input(file_name=ref_pdb)
    # read into iotbx.selection cache
    sel_cache = ref_pdb_in.hierarchy.atom_selection_cache()

    # Get selection object which corresponds to supplied chain residue id and altloc
    if altloc == "":
        ref_sel = sel_cache.selection("chain {} resid {}".format(chain, resid))
    else:
        ref_sel = sel_cache.selection("chain {} resid {} altloc {}".format(
            chain, resid, altloc))
    # Select that residue from main hierarchy
    ref_hier = ref_pdb_in.hierarchy.select(ref_sel)
    ref_lig = {}
    for ref_chain in ref_hier.only_model().chains():
        for residue_group in ref_chain.residue_groups():
            for atom_group in residue_group.atom_groups():
                # Get B factor and occ information on residue by looking a individual atoms
                for atom in atom_group.atoms():
                    ref_lig[atom.name] = atom.b

    # read into iotbx.hierarchy
    pdb_in = hierarchy.input(file_name=pdb)

    # read into iotbx.selection cache
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    # Get selection object which corresponds to supplied chain residue id and altloc
    if altloc == "":
        sel = sel_cache.selection("chain {} resid {}".format(chain, resid))

    else:
        sel = sel_cache.selection("chain {} resid {} altloc {}".format(
            chain, resid, altloc))

    hier = pdb_in.hierarchy.select(sel)
    # Select that residue from main hierarchy
    for current_chain in hier.only_model().chains():
        for residue_group in current_chain.residue_groups():
            for atom_group in residue_group.atom_groups():
                for atom in atom_group.atoms():

                    atom.b = ref_lig[atom.name]

    if not os.path.isdir(os.path.dirname(out_pdb)):
        os.makedirs(os.path.dirname(out_pdb))

    with open(out_pdb, "w") as out:
        out.write(
            pdb_in.hierarchy.as_pdb_string(
                crystal_symmetry=pdb_in.input.crystal_symmetry()))
Beispiel #22
0
def copy_atoms(copy_params):
    """ Copy atoms from one pdb file to many, then refine.

    Copy dimple pdb, mtz and cif with cys bond
    Copy ligand atoms from existing coordinates 
    Run giant.merge_conformations to generate a multi state model
    Copy link records suitable for both conformers of the ligand
    Run quick refine to generate refined ligand 
    """,

    # generate output directory if it doesn't exist
    if not os.path.exists(copy_params.output.out_dir):
        os.mkdir(copy_params.output.out_dir)

    # read in PDB file from which atoms are to be taken from (ground structure)
    pdb_in = hierarchy.input(file_name=copy_params.input.base_pdb)
    sel_cache = pdb_in.hierarchy.atom_selection_cache()

    # produce a hierarchy with atoms to copied
    selection_string_list = []
    chains_new = set()
    for atom_new in copy_params.input.atoms_new:
        selection_string = "(resid {} and chain {})".format(atom_new[1], atom_new[0])
        selection_string_list.append(selection_string)
        chains_new.add(atom_new[0])
    selection_string = "or".join(selection_string_list)
    new_atoms_sel = sel_cache.selection(selection_string)
    new_atoms_hier = pdb_in.hierarchy.select(new_atoms_sel)

    # Produce a selection string to determine which atoms are removed
    selection_string_list = []
    if copy_params.input.atoms_remove is not None:
        for atom_remove in copy_params.input.atoms_remove:
            selection_string = "(resid {} and chain {})".format(
                atom_remove[1], atom_remove[0]
            )
            selection_string_list.append(selection_string)

        selection_string = "or".join(selection_string_list)
        not_selection_string = "not ({})".format(selection_string)

    # Define xtals to loop over
    xtals = copy_params.input.xtal_list
    for num in range(
        copy_params.input.start_xtal_number, copy_params.input.end_xtal_number + 1
    ):
        xtal_name = copy_params.input.prefix + "{0:0>4}".format(num)
        xtals.append(xtal_name)

    # Loop over all xtals
    for xtal_name in xtals:

        # For quick rerun
        if (
            os.path.exists(
                os.path.join(
                    copy_params.output.out_dir, xtal_name, copy_params.output.refine_pdb
                )
            )
            and not copy_params.settings.overwrite
        ):
            print("Skipping {}, as attempted".format(xtal_name))
            continue

        # Run only if sufficent input data
        if not os.path.exists(
            os.path.join(copy_params.input.path, xtal_name, copy_params.input.pdb_style)
        ):
            print(
                "pdb does not exist: {}".format(
                    os.path.join(
                        copy_params.input.path, xtal_name, copy_params.input.pdb_style
                    )
                )
            )
            continue

        print("Trying to run {}".format(xtal_name))

        pdb_in_refine = hierarchy.input(
            file_name=os.path.join(
                copy_params.input.path, xtal_name, copy_params.input.pdb_style
            )
        )

        acceptor_hierarchy = pdb_in_refine.construct_hierarchy()

        # remove atoms from xtal
        if copy_params.input.atoms_remove is not None:
            refine_sel_cache = pdb_in_refine.hierarchy.atom_selection_cache()
            remove_atoms_sel = refine_sel_cache.selection(not_selection_string)
            removed_hier = acceptor_hierarchy.select(remove_atoms_sel)
            working_hier = removed_hier
        else:
            working_hier = acceptor_hierarchy

        # Add atoms from base_pdb
        donor_hierarchy = new_atoms_hier
        acceptor_hier = transfer_residue_groups_from_other(
            working_hier, donor_hierarchy, in_place=False, verbose=False
        )

        # Generate output xtal directories
        if not os.path.exists(os.path.join(copy_params.output.out_dir, xtal_name)):
            os.mkdir(os.path.join(copy_params.output.out_dir, xtal_name))

        # Write output pdb with changed atoms
        f = open(
            os.path.join(copy_params.output.out_dir, xtal_name, copy_params.output.pdb),
            "w+",
        )
        f.write(
            acceptor_hier.as_pdb_string(
                crystal_symmetry=pdb_in_refine.input.crystal_symmetry()
            )
        )
        f.close()

        # Copy the input pdb to output directory
        os.chdir(os.path.join(copy_params.output.out_dir, xtal_name))
        os.system(
            "cp {} {}".format(
                os.path.join(
                    copy_params.input.path, xtal_name, copy_params.input.pdb_style
                ),
                os.path.join(
                    copy_params.output.out_dir, xtal_name, copy_params.input.pdb_style
                ),
            )
        )

        # Copy the input cif to output_directory
        os.system(
            "cp {} {}".format(
                copy_params.input.cif,
                os.path.join(
                    copy_params.output.out_dir,
                    xtal_name,
                    os.path.basename(copy_params.input.cif),
                ),
            )
        )

        # Copy the input mtz to output directory
        os.system(
            "cp -rL {} {}".format(
                os.path.join(
                    copy_params.input.path, xtal_name, copy_params.input.mtz_style
                ),
                os.path.join(
                    copy_params.output.out_dir, xtal_name, copy_params.input.mtz_style
                ),
            )
        )
        # Run giant.merge_conforamtions
        os.system(
            "giant.merge_conformations major={} minor={}".format(
                os.path.join(
                    copy_params.output.out_dir, xtal_name, copy_params.input.pdb_style
                ),
                os.path.join(
                    copy_params.output.out_dir, xtal_name, copy_params.output.pdb
                ),
            )
        )

        # Add link record strings into multimodel pdb file, prior to refinement
        if copy_params.input.link_record_list is not None:

            with open(
                os.path.join(
                    copy_params.output.out_dir,
                    xtal_name,
                    copy_params.output.multi_state_model_pdb,
                ),
                "r",
            ) as original:

                multi_model = original.read()

            with open(
                os.path.join(
                    copy_params.output.out_dir,
                    xtal_name,
                    copy_params.output.multi_state_model_pdb,
                ),
                "w",
            ) as modified:

                for link_record in copy_params.input.link_record_list:
                    modified.write(link_record)

                modified.write(multi_model)

        # Add extra params
        if copy_params.input.extra_params is not None:
            with open(
                "multi-state-restraints.{}.params".format(copy_params.settings.program),
                "a+",
            ) as param_file:
                if copy_params.input.extra_params not in param_file.read():
                    param_file.write(copy_params.input.extra_params)

        if copy_params.settings.program == "phenix":
            cmds = "module load phenix\n"
        elif copy_params.settings.program == "buster":
            cmds = "module load buster\n"
        else:
            cmds = "\n"

        cmds += "source {}\n".format(copy_params.settings.ccp4_path)

        # Run giant.quick_refine
        cmds += "giant.quick_refine {} {} {} params={} program={}\n".format(
            os.path.join(
                copy_params.output.out_dir,
                xtal_name,
                copy_params.output.multi_state_model_pdb,
            ),
            os.path.join(
                copy_params.output.out_dir, xtal_name, copy_params.input.mtz_style
            ),
            os.path.join(copy_params.output.out_dir, xtal_name, copy_params.input.cif),
            os.path.join(
                copy_params.output.out_dir, xtal_name, copy_params.settings.param_file
            ),
            copy_params.settings.program,
        )
        cmds += "giant.split_conformations refine.pdb"

        if copy_params.settings.qsub:
            f = open(
                os.path.join(
                    copy_params.output.out_dir,
                    xtal_name,
                    "{}_quick_refine.sh".format(xtal_name),
                ),
                "w",
            )

            f.write(cmds)
            f.close()

            os.system(
                "qsub {}".format(
                    os.path.join(
                        copy_params.output.out_dir,
                        xtal_name,
                        "{}_quick_refine.sh".format(xtal_name),
                    )
                )
            )
        else:
            os.system(cmds)
Beispiel #23
0
from scitbx.array_family import flex

if __name__ == "__main__":
    """
    Copy a water atom into the centroid of ligand.
    """

    # parse path top ground and bound pdb
    parser = argparse.ArgumentParser("copy water atom to ligand centroid")
    parser.add_argument("--bound_pdb")
    parser.add_argument("--ground_pdb")
    parser.add_argument("--output_pdb")
    param = parser.parse_args()

    # Get centroid of ligand from bound pdb
    bound_pdb_in = hierarchy.input(file_name=param.bound_pdb)
    bound_sel_cache = bound_pdb_in.hierarchy.atom_selection_cache()
    selection_string = "resname LIG"
    lig_sel = bound_sel_cache.selection(selection_string)
    lig_hier = bound_pdb_in.hierarchy.select(lig_sel)
    lig_centroid = lig_hier.atoms().extract_xyz().mean()

    # read in ground state pdb
    ground_pdb_in = hierarchy.input(file_name=param.ground_pdb)
    ground_sel_cache = ground_pdb_in.hierarchy.atom_selection_cache()

    # get water selection
    wat_sel = bound_sel_cache.selection("water")
    wat_hier = bound_pdb_in.hierarchy.select(wat_sel)

    wat_resseq = wat_hier.atoms()[-1].parent().parent().resseq
Beispiel #24
0
def open_pdb(filename):
    Hierarchy.input(file_name=filename)
  def run(self, args, command_name, out=sys.stdout):
    command_line = (iotbx_option_parser(
      usage="%s [options]" % command_name,
      description='Example: %s data.mtz data.mtz ref_model.pdb'%command_name)
      .option(None, "--show_defaults",
        action="store_true",
        help="Show list of parameters.")
      ).process(args=args)

    cif_file = None
    processed_args = utils.process_command_line_args(
                       args          = args,
                       log           = sys.stdout,
                       master_params = master_phil)
    params = processed_args.params
    if(params is None): params = master_phil
    self.params = params.extract().ensemble_probability
    pdb_file_names = processed_args.pdb_file_names
    if len(pdb_file_names) != 1 :
      raise Sorry("Only one PDB structure may be used")
    pdb_file = file_reader.any_file(pdb_file_names[0])
    self.log = multi_out()
    self.log.register(label="stdout", file_object=sys.stdout)
    self.log.register(
      label="log_buffer",
      file_object=StringIO(),
      atexit_send_to=None)
    sys.stderr = self.log
    log_file = open(pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.log', "w")

    self.log.replace_stringio(
        old_label="log_buffer",
        new_label="log",
        new_file_object=log_file)
    utils.print_header(command_name, out = self.log)
    params.show(out = self.log)
    #
    f_obs = None
    r_free_flags = None
    reflection_files = processed_args.reflection_files

    if self.params.fobs_vs_fcalc_post_nll:
      if len(reflection_files) == 0:
        raise Sorry("Fobs from input MTZ required for fobs_vs_fcalc_post_nll")

    if len(reflection_files) > 0:
      crystal_symmetry = processed_args.crystal_symmetry
      print >> self.log, 'Reflection file : ', processed_args.reflection_file_names[0]
      utils.print_header("Model and data statistics", out = self.log)
      rfs = reflection_file_server(
        crystal_symmetry = crystal_symmetry,
        reflection_files = processed_args.reflection_files,
        log              = self.log)

      parameters = utils.data_and_flags_master_params().extract()
      determine_data_and_flags_result = utils.determine_data_and_flags(
        reflection_file_server  = rfs,
        parameters              = parameters,
        data_parameter_scope    = "refinement.input.xray_data",
        flags_parameter_scope   = "refinement.input.xray_data.r_free_flags",
        data_description        = "X-ray data",
        keep_going              = True,
        log                     = self.log)
      f_obs = determine_data_and_flags_result.f_obs
      number_of_reflections = f_obs.indices().size()
      r_free_flags = determine_data_and_flags_result.r_free_flags
      test_flag_value = determine_data_and_flags_result.test_flag_value
      if(r_free_flags is None):
        r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False))

    # process PDB
    pdb_file.assert_file_type("pdb")
    #
    pdb_in = hierarchy.input(file_name=pdb_file.file_name)
    ens_pdb_hierarchy = pdb_in.construct_hierarchy()
    ens_pdb_hierarchy.atoms().reset_i_seq()
    ens_pdb_xrs_s = pdb_in.input.xray_structures_simple()
    number_structures = len(ens_pdb_xrs_s)
    print >> self.log, 'Number of structure in ensemble : ', number_structures

    # Calculate sigmas from input map only
    if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None:
      # process MTZ
      input_file = file_reader.any_file(self.params.ensemble_sigma_map_input)
      if input_file.file_type == "hkl" :
        if input_file.file_object.file_type() != "ccp4_mtz" :
           raise Sorry("Only MTZ format accepted for map input")
        else:
          mtz_file = input_file
      else:
        raise Sorry("Only MTZ format accepted for map input")
      miller_arrays = mtz_file.file_server.miller_arrays
      map_coeffs_1 = miller_arrays[0]
      #
      xrs_list = []
      for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        # get sigma levels from ensemble fc for each structure
        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                          ens_pdb_xrs       = ens_pdb_xrs,
                          map_coeffs_1      = map_coeffs_1,
                          residue_detail    = self.params.residue_detail,
                          ignore_hd         = self.params.ignore_hd,
                          log               = self.log)
        xrs_list.append(xrs)
      # write ensemble pdb file, occupancies as sigma level
      filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_vs_' + self.params.ensemble_sigma_map_input.replace('.mtz','') + '_pensemble.pdb'
      write_ensemble_pdb(filename = filename,
                         xrs_list = xrs_list,
                         ens_pdb_hierarchy = ens_pdb_hierarchy
                         )

    # Do full analysis vs Fobs
    else:
      model_map_coeffs = []
      fmodel = None
      # Get <fcalc>
      for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        ens_pdb_xrs.set_occupancies(1.0)
        if model == 0:
          # If mtz not supplied get fobs from xray structure...
          # Use input Fobs for scoring against nll
          if self.params.fobs_vs_fcalc_post_nll:
            dummy_fobs = f_obs
          else:
            if f_obs == None:
              if self.params.fcalc_high_resolution == None:
                raise Sorry("Please supply high resolution limit or input mtz file.")
              dummy_dmin = self.params.fcalc_high_resolution
              dummy_dmax = self.params.fcalc_low_resolution
            else:
              print >> self.log, 'Supplied mtz used to determine high and low resolution cuttoffs'
              dummy_dmax, dummy_dmin = f_obs.d_max_min()
            #
            dummy_fobs = abs(ens_pdb_xrs.structure_factors(d_min = dummy_dmin).f_calc())
            dummy_fobs.set_observation_type_xray_amplitude()
            # If mtz supplied, free flags are over written to prevent array size error
            r_free_flags = dummy_fobs.array(data=flex.bool(dummy_fobs.data().size(),False))
          #
          fmodel = utils.fmodel_simple(
                     scattering_table         = "wk1995",
                     xray_structures          = [ens_pdb_xrs],
                     f_obs                    = dummy_fobs,
                     target_name              = 'ls',
                     bulk_solvent_and_scaling = False,
                     r_free_flags             = r_free_flags
                     )
          f_calc_ave = fmodel.f_calc().array(data = fmodel.f_calc().data()*0).deep_copy()
          # XXX Important to ensure scale is identical for each model and <model>
          fmodel.set_scale_switch = 1.0
          f_calc_ave_total = fmodel.f_calc().data().deep_copy()
        else:
          fmodel.update_xray_structure(xray_structure  = ens_pdb_xrs,
                                       update_f_calc   = True,
                                       update_f_mask   = False)
          f_calc_ave_total += fmodel.f_calc().data().deep_copy()
        print >> self.log, 'Model :', model+1
        print >> self.log, "\nStructure vs real Fobs (no bulk solvent or scaling)"
        print >> self.log, 'Rwork          : %5.4f '%fmodel.r_work()
        print >> self.log, 'Rfree          : %5.4f '%fmodel.r_free()
        print >> self.log, 'K1             : %5.4f '%fmodel.scale_k1()
        fcalc_edm        = fmodel.electron_density_map()
        fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type = 'Fc')
        fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(column_root_label ='Fc')
        if self.params.output_model_and_model_ave_mtz:
          fcalc_mtz_dataset.mtz_object().write(file_name = str(model+1)+"_Fc.mtz")
        model_map_coeffs.append(fcalc_map_coeffs.deep_copy())

      fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total / number_structures))
      print >> self.log, "\nEnsemble vs real Fobs (no bulk solvent or scaling)"
      print >> self.log, 'Rwork          : %5.4f '%fmodel.r_work()
      print >> self.log, 'Rfree          : %5.4f '%fmodel.r_free()
      print >> self.log, 'K1             : %5.4f '%fmodel.scale_k1()

      # Get <Fcalc> map
      fcalc_ave_edm        = fmodel.electron_density_map()
      fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(map_type = 'Fc').deep_copy()
      fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(column_root_label ='Fc')
      if self.params.output_model_and_model_ave_mtz:
        fcalc_ave_mtz_dataset.mtz_object().write(file_name = "aveFc.mtz")
      fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map()
      fcalc_ave_map_coeffs.apply_volume_scaling()
      fcalc_ave_map_data   = fcalc_ave_map_coeffs.real_map_unpadded()
      fcalc_ave_map_stats  = maptbx.statistics(fcalc_ave_map_data)

      print >> self.log, "<Fcalc> Map Stats :"
      fcalc_ave_map_stats.show_summary(f = self.log)
      offset = fcalc_ave_map_stats.min()
      model_neg_ll = []

      number_previous_scatters = 0

      # Run through structure list again and get probability
      xrs_list = []
      for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        if self.params.verbose:
          print >> self.log, '\n\nModel                   : ', model+1
        # Get model atom sigmas vs Fcalc
        fcalc_map = model_map_coeffs[model].fft_map()
        fcalc_map.apply_volume_scaling()
        fcalc_map_data  = fcalc_map.real_map_unpadded()
        fcalc_map_stats  = maptbx.statistics(fcalc_map_data)
        if self.params.verbose:
          print >> self.log, "Fcalc map stats         :"
        fcalc_map_stats.show_summary(f = self.log)

        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                            ens_pdb_xrs       = ens_pdb_xrs,
                            fft_map_1         = fcalc_map,
                            model_i           = model,
                            residue_detail    = self.params.residue_detail,
                            ignore_hd         = self.params.ignore_hd,
                            number_previous_scatters = number_previous_scatters,
                            log               = self.log)
        fcalc_sigmas = xrs.scatterers().extract_occupancies()
        del fcalc_map
        # Get model atom sigmas vs <Fcalc>
        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                            ens_pdb_xrs       = ens_pdb_xrs,
                            fft_map_1         = fcalc_ave_map_coeffs,
                            model_i           = model,
                            residue_detail    = self.params.residue_detail,
                            ignore_hd         = self.params.ignore_hd,
                            number_previous_scatters = number_previous_scatters,
                            log               = self.log)

        ### For testing other residue averaging options
        #print xrs.residue_selections

        fcalc_ave_sigmas = xrs.scatterers().extract_occupancies()
        # Probability of model given <model>
        prob = fcalc_ave_sigmas / fcalc_sigmas
        # XXX debug option
        if False:
          for n,p in enumerate(prob):
            print >> self.log, ' {0:5d} {1:5.3f}'.format(n,p)
        # Set probabilty between 0 and 1
        # XXX Make Histogram / more stats
        prob_lss_zero = flex.bool(prob <= 0)
        prob_grt_one = flex.bool(prob > 1)
        prob.set_selected(prob_lss_zero, 0.001)
        prob.set_selected(prob_grt_one, 1.0)
        xrs.set_occupancies(prob)
        xrs_list.append(xrs)
        sum_neg_ll = sum(-flex.log(prob))
        model_neg_ll.append((sum_neg_ll, model))
        if self.params.verbose:
          print >> self.log, 'Model probability stats :'
          print >> self.log, prob.min_max_mean().show()
          print >> self.log, '  Count < 0.0 : ', prob_lss_zero.count(True)
          print >> self.log, '  Count > 1.0 : ', prob_grt_one.count(True)

        # For averaging by residue
        number_previous_scatters += ens_pdb_xrs.sites_cart().size()

      # write ensemble pdb file, occupancies as sigma level
      write_ensemble_pdb(filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb',
                       xrs_list = xrs_list,
                       ens_pdb_hierarchy = ens_pdb_hierarchy
                       )

      # XXX Test ordering models by nll
      # XXX Test removing nth percentile atoms
      if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll:
        for percentile in [1.0,0.975,0.95,0.9,0.8,0.6,0.2]:
          model_neg_ll = sorted(model_neg_ll)
          f_calc_ave_total_reordered = None
          print_list = []
          for i_neg_ll in model_neg_ll:
            xrs = xrs_list[i_neg_ll[1]]
            nll_occ = xrs.scatterers().extract_occupancies()

            # Set q=0 nth percentile atoms
            sorted_nll_occ = sorted(nll_occ, reverse=True)
            number_atoms = len(sorted_nll_occ)
            percentile_prob_cutoff = sorted_nll_occ[int(number_atoms * percentile)-1]
            cutoff_selections = flex.bool(nll_occ < percentile_prob_cutoff)
            cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected(cutoff_selections, 0.0)
            #XXX Debug
            if False:
              print '\nDebug'
              for x in xrange(len(cutoff_selections)):
                print cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]
              print percentile
              print percentile_prob_cutoff
              print cutoff_selections.count(True)
              print cutoff_selections.size()
              print cutoff_nll_occ.count(0.0)
              print 'Count q = 1           : ', cutoff_nll_occ.count(1.0)
              print 'Count scatterers size : ', cutoff_nll_occ.size()

            xrs.set_occupancies(cutoff_nll_occ)
            fmodel.update_xray_structure(xray_structure  = xrs,
                                         update_f_calc   = True,
                                         update_f_mask   = True)

            if f_calc_ave_total_reordered == None:
              f_calc_ave_total_reordered = fmodel.f_calc().data().deep_copy()
              f_mask_ave_total_reordered = fmodel.f_masks()[0].data().deep_copy()
              cntr = 1
            else:
              f_calc_ave_total_reordered += fmodel.f_calc().data().deep_copy()
              f_mask_ave_total_reordered += fmodel.f_masks()[0].data().deep_copy()
              cntr+=1
            fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total_reordered / cntr).deep_copy(),
                          f_mask = f_calc_ave.array(f_mask_ave_total_reordered / cntr).deep_copy()
                          )

            # Update solvent and scale
            # XXX Will need to apply_back_trace on latest version
            fmodel.set_scale_switch = 0
            fmodel.update_all_scales()

            # Reset occ for outout
            xrs.set_occupancies(nll_occ)
            # k1 updated vs Fobs
            if self.params.fobs_vs_fcalc_post_nll:
              print_list.append([cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free()])

          # Order models by nll and print summary
          print >> self.log, '\nModels ranked by nll <Fcalc> R-factors recalculated'
          print >> self.log, 'Percentile cutoff : {0:5.3f}'.format(percentile)
          xrs_list_sorted_nll = []
          print >> self.log, '      |      NLL     <Rw>     <Rf>    Ens Model'
          for info in print_list:
            print >> self.log, ' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.format(
              info[0],
              info[1],
              info[3],
              info[4],
              info[2]+1,
              )
            xrs_list_sorted_nll.append(xrs_list[info[2]])

        # Output nll ordered ensemble

        write_ensemble_pdb(filename = 'nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb',
                       xrs_list = xrs_list_sorted_nll,
                       ens_pdb_hierarchy = ens_pdb_hierarchy
                       )
Beispiel #26
0
    def run(self, args, command_name, out=sys.stdout):
        command_line = (iotbx_option_parser(
            usage="%s [options]" % command_name,
            description='Example: %s data.mtz data.mtz ref_model.pdb' %
            command_name).option(
                None,
                "--show_defaults",
                action="store_true",
                help="Show list of parameters.")).process(args=args)

        cif_file = None
        processed_args = utils.process_command_line_args(
            args=args, log=sys.stdout, master_params=master_phil)
        params = processed_args.params
        if (params is None): params = master_phil
        self.params = params.extract().ensemble_probability
        pdb_file_names = processed_args.pdb_file_names
        if len(pdb_file_names) != 1:
            raise Sorry("Only one PDB structure may be used")
        pdb_file = file_reader.any_file(pdb_file_names[0])
        self.log = multi_out()
        self.log.register(label="stdout", file_object=sys.stdout)
        self.log.register(label="log_buffer",
                          file_object=StringIO(),
                          atexit_send_to=None)
        sys.stderr = self.log
        log_file = open(
            pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
            '_pensemble.log', "w")

        self.log.replace_stringio(old_label="log_buffer",
                                  new_label="log",
                                  new_file_object=log_file)
        utils.print_header(command_name, out=self.log)
        params.show(out=self.log)
        #
        f_obs = None
        r_free_flags = None
        reflection_files = processed_args.reflection_files

        if self.params.fobs_vs_fcalc_post_nll:
            if len(reflection_files) == 0:
                raise Sorry(
                    "Fobs from input MTZ required for fobs_vs_fcalc_post_nll")

        if len(reflection_files) > 0:
            crystal_symmetry = processed_args.crystal_symmetry
            print('Reflection file : ',
                  processed_args.reflection_file_names[0],
                  file=self.log)
            utils.print_header("Model and data statistics", out=self.log)
            rfs = reflection_file_server(
                crystal_symmetry=crystal_symmetry,
                reflection_files=processed_args.reflection_files,
                log=self.log)

            parameters = extract_xtal_data.data_and_flags_master_params(
            ).extract()
            determine_data_and_flags_result = extract_xtal_data.run(
                reflection_file_server=rfs,
                parameters=parameters,
                data_parameter_scope="refinement.input.xray_data",
                flags_parameter_scope="refinement.input.xray_data.r_free_flags",
                data_description="X-ray data",
                keep_going=True,
                log=self.log)
            f_obs = determine_data_and_flags_result.f_obs
            number_of_reflections = f_obs.indices().size()
            r_free_flags = determine_data_and_flags_result.r_free_flags
            test_flag_value = determine_data_and_flags_result.test_flag_value
            if (r_free_flags is None):
                r_free_flags = f_obs.array(
                    data=flex.bool(f_obs.data().size(), False))

        # process PDB
        pdb_file.assert_file_type("pdb")
        #
        pdb_in = hierarchy.input(file_name=pdb_file.file_name)
        ens_pdb_hierarchy = pdb_in.construct_hierarchy()
        ens_pdb_hierarchy.atoms().reset_i_seq()
        ens_pdb_xrs_s = pdb_in.input.xray_structures_simple()
        number_structures = len(ens_pdb_xrs_s)
        print('Number of structure in ensemble : ',
              number_structures,
              file=self.log)

        # Calculate sigmas from input map only
        if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None:
            # process MTZ
            input_file = file_reader.any_file(
                self.params.ensemble_sigma_map_input)
            if input_file.file_type == "hkl":
                if input_file.file_object.file_type() != "ccp4_mtz":
                    raise Sorry("Only MTZ format accepted for map input")
                else:
                    mtz_file = input_file
            else:
                raise Sorry("Only MTZ format accepted for map input")
            miller_arrays = mtz_file.file_server.miller_arrays
            map_coeffs_1 = miller_arrays[0]
            #
            xrs_list = []
            for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                # get sigma levels from ensemble fc for each structure
                xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy,
                                    ens_pdb_xrs=ens_pdb_xrs,
                                    map_coeffs_1=map_coeffs_1,
                                    residue_detail=self.params.residue_detail,
                                    ignore_hd=self.params.ignore_hd,
                                    log=self.log)
                xrs_list.append(xrs)
            # write ensemble pdb file, occupancies as sigma level
            filename = pdb_file_names[0].split('/')[-1].replace(
                '.pdb',
                '') + '_vs_' + self.params.ensemble_sigma_map_input.replace(
                    '.mtz', '') + '_pensemble.pdb'
            write_ensemble_pdb(filename=filename,
                               xrs_list=xrs_list,
                               ens_pdb_hierarchy=ens_pdb_hierarchy)

        # Do full analysis vs Fobs
        else:
            model_map_coeffs = []
            fmodel = None
            # Get <fcalc>
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                ens_pdb_xrs.set_occupancies(1.0)
                if model == 0:
                    # If mtz not supplied get fobs from xray structure...
                    # Use input Fobs for scoring against nll
                    if self.params.fobs_vs_fcalc_post_nll:
                        dummy_fobs = f_obs
                    else:
                        if f_obs == None:
                            if self.params.fcalc_high_resolution == None:
                                raise Sorry(
                                    "Please supply high resolution limit or input mtz file."
                                )
                            dummy_dmin = self.params.fcalc_high_resolution
                            dummy_dmax = self.params.fcalc_low_resolution
                        else:
                            print(
                                'Supplied mtz used to determine high and low resolution cuttoffs',
                                file=self.log)
                            dummy_dmax, dummy_dmin = f_obs.d_max_min()
                        #
                        dummy_fobs = abs(
                            ens_pdb_xrs.structure_factors(
                                d_min=dummy_dmin).f_calc())
                        dummy_fobs.set_observation_type_xray_amplitude()
                        # If mtz supplied, free flags are over written to prevent array size error
                        r_free_flags = dummy_fobs.array(
                            data=flex.bool(dummy_fobs.data().size(), False))
                    #
                    fmodel = utils.fmodel_simple(
                        scattering_table="wk1995",
                        xray_structures=[ens_pdb_xrs],
                        f_obs=dummy_fobs,
                        target_name='ls',
                        bulk_solvent_and_scaling=False,
                        r_free_flags=r_free_flags)
                    f_calc_ave = fmodel.f_calc().array(
                        data=fmodel.f_calc().data() * 0).deep_copy()
                    # XXX Important to ensure scale is identical for each model and <model>
                    fmodel.set_scale_switch = 1.0
                    f_calc_ave_total = fmodel.f_calc().data().deep_copy()
                else:
                    fmodel.update_xray_structure(xray_structure=ens_pdb_xrs,
                                                 update_f_calc=True,
                                                 update_f_mask=False)
                    f_calc_ave_total += fmodel.f_calc().data().deep_copy()
                print('Model :', model + 1, file=self.log)
                print("\nStructure vs real Fobs (no bulk solvent or scaling)",
                      file=self.log)
                print('Rwork          : %5.4f ' % fmodel.r_work(),
                      file=self.log)
                print('Rfree          : %5.4f ' % fmodel.r_free(),
                      file=self.log)
                print('K1             : %5.4f ' % fmodel.scale_k1(),
                      file=self.log)
                fcalc_edm = fmodel.electron_density_map()
                fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc')
                fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(
                    column_root_label='Fc')
                if self.params.output_model_and_model_ave_mtz:
                    fcalc_mtz_dataset.mtz_object().write(
                        file_name=str(model + 1) + "_Fc.mtz")
                model_map_coeffs.append(fcalc_map_coeffs.deep_copy())

            fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total /
                                                  number_structures))
            print("\nEnsemble vs real Fobs (no bulk solvent or scaling)",
                  file=self.log)
            print('Rwork          : %5.4f ' % fmodel.r_work(), file=self.log)
            print('Rfree          : %5.4f ' % fmodel.r_free(), file=self.log)
            print('K1             : %5.4f ' % fmodel.scale_k1(), file=self.log)

            # Get <Fcalc> map
            fcalc_ave_edm = fmodel.electron_density_map()
            fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(
                map_type='Fc').deep_copy()
            fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(
                column_root_label='Fc')
            if self.params.output_model_and_model_ave_mtz:
                fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz")
            fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map()
            fcalc_ave_map_coeffs.apply_volume_scaling()
            fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded()
            fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data)

            print("<Fcalc> Map Stats :", file=self.log)
            fcalc_ave_map_stats.show_summary(f=self.log)
            offset = fcalc_ave_map_stats.min()
            model_neg_ll = []

            number_previous_scatters = 0

            # Run through structure list again and get probability
            xrs_list = []
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                if self.params.verbose:
                    print('\n\nModel                   : ',
                          model + 1,
                          file=self.log)
                # Get model atom sigmas vs Fcalc
                fcalc_map = model_map_coeffs[model].fft_map()
                fcalc_map.apply_volume_scaling()
                fcalc_map_data = fcalc_map.real_map_unpadded()
                fcalc_map_stats = maptbx.statistics(fcalc_map_data)
                if self.params.verbose:
                    print("Fcalc map stats         :", file=self.log)
                fcalc_map_stats.show_summary(f=self.log)

                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_map,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)
                fcalc_sigmas = xrs.scatterers().extract_occupancies()
                del fcalc_map
                # Get model atom sigmas vs <Fcalc>
                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_ave_map_coeffs,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)

                ### For testing other residue averaging options
                #print xrs.residue_selections

                fcalc_ave_sigmas = xrs.scatterers().extract_occupancies()
                # Probability of model given <model>
                prob = fcalc_ave_sigmas / fcalc_sigmas
                # XXX debug option
                if False:
                    for n, p in enumerate(prob):
                        print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log)
                # Set probabilty between 0 and 1
                # XXX Make Histogram / more stats
                prob_lss_zero = flex.bool(prob <= 0)
                prob_grt_one = flex.bool(prob > 1)
                prob.set_selected(prob_lss_zero, 0.001)
                prob.set_selected(prob_grt_one, 1.0)
                xrs.set_occupancies(prob)
                xrs_list.append(xrs)
                sum_neg_ll = sum(-flex.log(prob))
                model_neg_ll.append((sum_neg_ll, model))
                if self.params.verbose:
                    print('Model probability stats :', file=self.log)
                    print(prob.min_max_mean().show(), file=self.log)
                    print('  Count < 0.0 : ',
                          prob_lss_zero.count(True),
                          file=self.log)
                    print('  Count > 1.0 : ',
                          prob_grt_one.count(True),
                          file=self.log)

                # For averaging by residue
                number_previous_scatters += ens_pdb_xrs.sites_cart().size()

            # write ensemble pdb file, occupancies as sigma level
            write_ensemble_pdb(
                filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                '_pensemble.pdb',
                xrs_list=xrs_list,
                ens_pdb_hierarchy=ens_pdb_hierarchy)

            # XXX Test ordering models by nll
            # XXX Test removing nth percentile atoms
            if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll:
                for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]:
                    model_neg_ll = sorted(model_neg_ll)
                    f_calc_ave_total_reordered = None
                    print_list = []
                    for i_neg_ll in model_neg_ll:
                        xrs = xrs_list[i_neg_ll[1]]
                        nll_occ = xrs.scatterers().extract_occupancies()

                        # Set q=0 nth percentile atoms
                        sorted_nll_occ = sorted(nll_occ, reverse=True)
                        number_atoms = len(sorted_nll_occ)
                        percentile_prob_cutoff = sorted_nll_occ[
                            int(number_atoms * percentile) - 1]
                        cutoff_selections = flex.bool(
                            nll_occ < percentile_prob_cutoff)
                        cutoff_nll_occ = flex.double(nll_occ.size(),
                                                     1.0).set_selected(
                                                         cutoff_selections,
                                                         0.0)
                        #XXX Debug
                        if False:
                            print('\nDebug')
                            for x in range(len(cutoff_selections)):
                                print(cutoff_selections[x], nll_occ[x],
                                      cutoff_nll_occ[x])
                            print(percentile)
                            print(percentile_prob_cutoff)
                            print(cutoff_selections.count(True))
                            print(cutoff_selections.size())
                            print(cutoff_nll_occ.count(0.0))
                            print('Count q = 1           : ',
                                  cutoff_nll_occ.count(1.0))
                            print('Count scatterers size : ',
                                  cutoff_nll_occ.size())

                        xrs.set_occupancies(cutoff_nll_occ)
                        fmodel.update_xray_structure(xray_structure=xrs,
                                                     update_f_calc=True,
                                                     update_f_mask=True)

                        if f_calc_ave_total_reordered == None:
                            f_calc_ave_total_reordered = fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered = fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr = 1
                        else:
                            f_calc_ave_total_reordered += fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered += fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr += 1
                        fmodel.update(
                            f_calc=f_calc_ave.array(
                                f_calc_ave_total_reordered / cntr).deep_copy(),
                            f_mask=f_calc_ave.array(
                                f_mask_ave_total_reordered / cntr).deep_copy())

                        # Update solvent and scale
                        # XXX Will need to apply_back_trace on latest version
                        fmodel.set_scale_switch = 0
                        fmodel.update_all_scales()

                        # Reset occ for outout
                        xrs.set_occupancies(nll_occ)
                        # k1 updated vs Fobs
                        if self.params.fobs_vs_fcalc_post_nll:
                            print_list.append([
                                cntr, i_neg_ll[0], i_neg_ll[1],
                                fmodel.r_work(),
                                fmodel.r_free()
                            ])

                    # Order models by nll and print summary
                    print(
                        '\nModels ranked by nll <Fcalc> R-factors recalculated',
                        file=self.log)
                    print('Percentile cutoff : {0:5.3f}'.format(percentile),
                          file=self.log)
                    xrs_list_sorted_nll = []
                    print('      |      NLL     <Rw>     <Rf>    Ens Model',
                          file=self.log)
                    for info in print_list:
                        print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.
                              format(
                                  info[0],
                                  info[1],
                                  info[3],
                                  info[4],
                                  info[2] + 1,
                              ),
                              file=self.log)
                        xrs_list_sorted_nll.append(xrs_list[info[2]])

                # Output nll ordered ensemble

                write_ensemble_pdb(
                    filename='nll_ordered_' +
                    pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                    '_pensemble.pdb',
                    xrs_list=xrs_list_sorted_nll,
                    ens_pdb_hierarchy=ens_pdb_hierarchy)
Beispiel #27
0
    # Initialize MPI

    if mpi_enabled():
        from mpi4py import MPI
        mpi_comm = MPI.COMM_WORLD
        mpi_rank = mpi_comm.Get_rank()
        mpi_size = mpi_comm.Get_size()
    else:
        mpi_comm = None
        mpi_rank = 0
        mpi_size = 1

# read .pdb file. It's used as a template, so don't sort it.

    if mpi_rank == 0:
        pdb_in = hierarchy.input(file_name=top_file, sort_atoms=False)

        # MEW use cctbx.xray.structure.customized_copy() here to change the unit cell and space group as needed
        symm = pdb_in.input.crystal_symmetry()
        if unit_cell_str is None:
            unit_cell = symm.unit_cell()
        else:
            unit_cell = unit_cell_str
        if space_group_str is None:
            space_group_info = symm.space_group_info()
        else:
            space_group_info = cctbx.sgtbx.space_group_info(
                symbol=space_group_str)

        xrs = pdb_in.input.xray_structure_simple(
            crystal_symmetry=crystal.symmetry(