Beispiel #1
0
def load_structure(fpath, chain=None):
    """
    Args:
        fpath: filepath to either pdb or cif file
        chain: the chain id
    Returns:
        biotite.structure.AtomArray
    """
    if fpath.endswith('cif'):
        with open(fpath) as fin:
            pdbxf = pdbx.PDBxFile.read(fin)
        structure = pdbx.get_structure(pdbxf, model=1)
    elif fpath.endswith('pdb'):
        with open(fpath) as fin:
            pdbf = pdb.PDBFile.read(fin)
        structure = pdb.get_structure(pdbf, model=1)
    issolvent = filter_solvent(structure)
    structure = structure[~issolvent]
    chains = get_chains(structure)
    print(f'Found {len(chains)} chains:', chains, '\n')
    if len(chains) == 0:
        raise ValueError('No chains found in the input file.')
    if chain is None:
        chain = chains[0]
    if chain not in chains:
        raise ValueError(f'Chain {chain} not found in input file')
    structure = structure[structure.chain_id == chain]
    print(f'Loaded chain {chain}\n')
    return structure
Beispiel #2
0
def test_remove_pbc_unsegmented():
    """
    `remove_pbc()` should not alter unsegmented structures,
    when the structure is entirely in the box.
    Exclude the solvent, due to high distances between each atom. 
    """
    ref_array = load_structure(join(data_dir("structure"), "3o5r.mmtf"))
    # Center structure in box
    centroid = struc.centroid(ref_array)
    box_center = np.diag(ref_array.box) / 2
    ref_array = struc.translate(ref_array, box_center - centroid)
    # Remove solvent
    ref_array = ref_array[~struc.filter_solvent(ref_array)]
    array = struc.remove_pbc(ref_array)

    assert ref_array.equal_annotation_categories(array)
    assert np.allclose(ref_array.coord, array.coord)
import biotite.database.rcsb as rcsb
import numpy as np

# The output file names
# Modify these values for actual file output
ku_dna_file = biotite.temp_file("ku_dna.cif")
ku_file = biotite.temp_file("ku.cif")

# Download and parse structure files
file = rcsb.fetch("1JEY", "mmtf", biotite.temp_dir())
ku_dna = strucio.load_structure(file)
file = rcsb.fetch("1JEQ", "mmtf", biotite.temp_dir())
ku = strucio.load_structure(file)
# Remove DNA and water
ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")]
ku_dna = ku_dna[~struc.filter_solvent(ku_dna)]
ku = ku[~struc.filter_solvent(ku)]
# The structures have a differing amount of atoms missing
# at the the start and end of the structure
# -> Find common structure
ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)]
ku_common = ku[struc.filter_intersection(ku, ku_dna)]
# Superimpose
ku_superimposed, transformation = struc.superimpose(
    ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
# We do not want the cropped structures
# -> apply superimposition on structures before intersection filtering
ku_superimposed = struc.superimpose_apply(ku, transformation)
# Write PDBx files as input for PyMOL
cif_file = pdbx.PDBxFile()
pdbx.set_structure(cif_file, ku_dna, data_block="ku_dna")
Beispiel #4
0
def test_solvent_filter(sample_array):
    assert len(sample_array[struc.filter_solvent(sample_array)]) == 287
Beispiel #5
0
import biotite.structure.io.pdb as pdb
import biotite.structure as struc

pdb_file = pdb.PDBFile()
pdb_file.read(snakemake.input[0])
# Only use one model
structure = pdb_file.get_structure(model=1)
# Remove water
structure = structure[~struc.filter_solvent(structure)]
# Remove hydrogens
structure = structure[structure.element != "H"]
pdb_file.set_structure(structure)
pdb_file.write(snakemake.output[0])
traj_file_path = "../../download/waterbox_md.xtc"

# Load the trajectory
traj = strucio.load_structure(traj_file_path, template=templ_file_path)
# Sanitize the PDB file produced by Gromacs:
# Use capital letters for atom elements...
traj.element = np.array([element.upper() for element in traj.element])
# ...and set 'hetero' to true for all atoms,
# as the file does not contain any regular chains.
traj.hetero[:] = True

# Create boolean masks for all sodium or chloride ions, respectively
na = traj.coord[:, traj.element == "NA"]
cl = traj.coord[:, traj.element == "CL"]
# Create a boolean mask for all watewr molecules
solvent = traj[:, struc.filter_solvent(traj)]
# Calculate the RDF of water molecules
# centered on sodium or chloride ions, respectively
N_BINS = 200
bins, rdf_na = struc.rdf(center=na, atoms=solvent, periodic=True, bins=N_BINS)
bins, rdf_cl = struc.rdf(center=cl, atoms=solvent, periodic=True, bins=N_BINS)

# Find peaks
# This requires a bit trial and error on the parameters
# The 'x' in '[x * N_BINS/10]' is the expected peak width in Å,
# that is transformed into a peak width in amount of values
peak_indices_na = signal.find_peaks_cwt(rdf_na, widths=[0.2 * N_BINS / 10])
peak_indices_cl = signal.find_peaks_cwt(rdf_cl, widths=[0.3 * N_BINS / 10])
peak_indices_na, peak_indices_cl = peak_indices_na[:3], peak_indices_cl[:3]

# Create plots