Ejemplo n.º 1
0
def test_array_conversion(path, model, hybrid36):
    pdb_file = pdb.PDBFile.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    try:
        array1 = pdb.get_structure(pdb_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    if hybrid36 and (array1.res_id < 1).any():
        with pytest.raises(ValueError,
                           match="Only positive integers can be converted "
                           "into hybrid-36 notation"):
            pdb_file = pdb.PDBFile()
            pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
        return
    else:
        pdb_file = pdb.PDBFile()
        pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)

    array2 = pdb.get_structure(pdb_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Ejemplo n.º 2
0
def test_array_conversion(path, single_model, hybrid36):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    array1 = pdb.get_structure(pdb_file, model=model)

    if hybrid36 and (array1.res_id < 1).any():
        with pytest.raises(ValueError,
                           match="Only positive integers can be converted "
                           "into hybrid-36 notation"):
            pdb_file = pdb.PDBFile()
            pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
        return
    else:
        pdb_file = pdb.PDBFile()
        pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)

    array2 = pdb.get_structure(pdb_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Ejemplo n.º 3
0
def test_extra_fields(hybrid36):
    path = join(data_dir, "1l2y.pdb")
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    stack1 = pdb_file.get_structure(
        extra_fields=[
            "atom_id", "b_factor", "occupancy", "charge"
        ]
    )

    with pytest.raises(ValueError):
        pdb_file.get_structure(extra_fields=["unsupported_field"])

    pdb_file = pdb.PDBFile()
    pdb_file.set_structure(stack1, hybrid36=hybrid36)
    
    stack2 = pdb_file.get_structure(
        extra_fields=[
            "atom_id", "b_factor", "occupancy", "charge"
        ]
    )
    
    assert stack1.ins_code.tolist() == stack2.ins_code.tolist()
    assert stack1.atom_id.tolist() == stack2.atom_id.tolist()
    assert stack1.b_factor.tolist() == approx(stack2.b_factor.tolist())
    assert stack1.occupancy.tolist() == approx(stack2.occupancy.tolist())
    assert stack1.charge.tolist() == stack2.charge.tolist()
    assert stack1 == stack2
Ejemplo n.º 4
0
def test_array_conversion(path, single_model):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    array1 = pdb_file.get_structure(model=model)
    pdb_file = pdb.PDBFile()
    pdb_file.set_structure(array1)
    array2 = pdb_file.get_structure(model=model)
    assert array1 == array2
Ejemplo n.º 5
0
def test_id_overflow():
    # Create an atom array >= 100k atoms
    length = 100000
    a = struc.AtomArray(length)
    a.coord = np.zeros(a.coord.shape)
    a.chain_id = np.full(length, "A")
    # Create residue IDs over 10000
    a.res_id = np.arange(1, length + 1)
    a.res_name = np.full(length, "GLY")
    a.hetero = np.full(length, False)
    a.atom_name = np.full(length, "CA")
    a.element = np.full(length, "C")

    # Write stack to pdb file and make sure a warning is thrown
    with pytest.warns(UserWarning):
        temp = TemporaryFile("w+")
        pdb_file = pdb.PDBFile()
        pdb_file.set_structure(a)
        pdb_file.write(temp)

    # Assert file can be read properly
    temp.seek(0)
    a2 = pdb.get_structure(pdb.PDBFile.read(temp))
    assert (a2.array_length() == a.array_length())

    # Manually check if the written atom id is correct
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = int(last_line.split()[1])
    assert (atom_id == 1)

    temp.close()

    # Write stack as hybrid-36 pdb file: no warning should be thrown
    with pytest.warns(None) as record:
        temp = TemporaryFile("w+")
        tmp_pdb_file = pdb.PDBFile()
        tmp_pdb_file.set_structure(a, hybrid36=True)
        tmp_pdb_file.write(temp)
    assert len(record) == 0

    # Manually check if the output is written as correct hybrid-36
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = last_line.split()[1]
    assert (atom_id == "A0000")
    res_id = last_line.split()[4][1:]
    assert (res_id == "BXG0")

    temp.close()
Ejemplo n.º 6
0
def temp_pdb(structure,tmp_dir):
    fd,temp_path = tempfile.mkstemp(dir=tmp_dir.name,text=True)
    pdb_out = PDB.PDBFile()
    PDB.set_structure(pdb_out,structure)
    pdb_out.write(temp_path)
    os.close(fd)
    return temp_path
Ejemplo n.º 7
0
def test_extra_fields(hybrid36):
    path = join(data_dir("structure"), "1l2y.pdb")
    pdb_file = pdb.PDBFile.read(path)
    stack1 = pdb_file.get_structure(
        extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    with pytest.raises(ValueError):
        pdb_file.get_structure(extra_fields=["unsupported_field"])

    # Add non-neutral charge values,
    # as the input PDB has only neutral charges
    stack1.charge[0] = -1
    stack1.charge[1] = 2

    pdb_file = pdb.PDBFile()
    pdb_file.set_structure(stack1, hybrid36=hybrid36)

    stack2 = pdb_file.get_structure(
        extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    assert stack1.ins_code.tolist() == stack2.ins_code.tolist()
    assert stack1.atom_id.tolist() == stack2.atom_id.tolist()
    assert stack1.b_factor.tolist() == approx(stack2.b_factor.tolist())
    assert stack1.occupancy.tolist() == approx(stack2.occupancy.tolist())
    assert stack1.charge.tolist() == stack2.charge.tolist()
    assert stack1 == stack2
Ejemplo n.º 8
0
def test_single(pdb_id):
    file_name = join(data_dir, pdb_id + ".pdb")

    # Single atom SASA, compare with MDTraj
    file = pdb.PDBFile()
    file.read(file_name)
    array = file.get_structure(model=1)
    sasa = struc.sasa(array, vdw_radii="Single", point_number=5000)

    from biotite.structure.info.radii import _SINGLE_RADII as radii
    import mdtraj
    # Use the same atom radii
    radii = {
        element.capitalize(): radius / 10
        for element, radius in radii.items()
    }
    traj = mdtraj.load(file_name)
    # Conversion from nm^2 to A^2
    sasa_exp = mdtraj.shrake_rupley(
        traj, change_radii=radii, n_sphere_points=5000)[0] * 100

    # Assert that more than 90% of atoms
    # have less than 10% SASA difference
    assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=1e-1,
                                       atol=1e-1)) / len(sasa) > 0.9
    # Assert that more than 98% of atoms
    # have less than 1% SASA difference
    assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=1e-2,
                                       atol=1e-1)) / len(sasa) > 0.98
Ejemplo n.º 9
0
def test_pdb_to_gro(path, single_model):
    # Converting stacks between formats should not change data
    model = 1 if single_model else None

    # Read in data
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a1 = pdb_file.get_structure(model=model)

    # Save stack as gro
    tmp_file_name = biotite.temp_file("gro")
    gro_file = gro.GROFile()
    gro_file.set_structure(a1)
    gro_file.write(tmp_file_name)

    # Reload stack from gro
    gro_file = gro.GROFile()
    gro_file.read(tmp_file_name)
    a2 = gro_file.get_structure(model=model)

    assert a1.array_length() == a2.array_length()

    for category in ["res_id", "res_name", "atom_name"]:
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()

    # Mind rounding errors when converting pdb to gro (A -> nm)
    assert a1.coord.flatten().tolist() \
        == approx(a2.coord.flatten().tolist(), abs=1e-2)
Ejemplo n.º 10
0
def test_array_conversion(path, single_model, hybrid36):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    array1 = pdb.get_structure(pdb_file, model=model)
    pdb_file = pdb.PDBFile()
    pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
    array2 = pdb.get_structure(pdb_file, model=model)
    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Ejemplo n.º 11
0
def test_get_coord(model):
    # Choose a structure without inscodes and altlocs
    # to avoid atom filtering in reference atom array (stack)
    path = join(data_dir, "1l2y.pdb")
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    ref_coord = pdb_file.get_structure(model=model).coord
    test_coord = pdb_file.get_coord(model=model)
    assert test_coord.shape == ref_coord.shape
    assert (test_coord == ref_coord).all()
Ejemplo n.º 12
0
def test_box_shape(path, single_model):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a = pdb_file.get_structure(model=model)

    if isinstance(a, struc.AtomArray):
        expected_box_dim = (3, 3)
    else:
        expected_box_dim = (len(a), 3, 3)
    assert expected_box_dim == a.box.shape
Ejemplo n.º 13
0
def test_box_parsing():
    path = join(data_dir, "1igy.pdb")
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a = pdb_file.get_structure()
    expected_box = np.array([[
        [66.65,   0.00, 0.00],
        [0.00,  190.66, 0.00],
        [-24.59,  0.00, 68.84]
    ]])

    assert expected_box.flatten().tolist() \
           == approx(a.box.flatten().tolist(), abs=1e-2)
Ejemplo n.º 14
0
def test_pdbx_consistency(path, single_model):
    model = 1 if single_model else None
    cif_path = splitext(path)[0] + ".cif"
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a1 = pdb_file.get_structure(model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.tolist() == a2.coord.tolist()
Ejemplo n.º 15
0
def test_extra_fields():
    path = join(data_dir, "1l2y.pdb")
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    stack1 = pdb_file.get_structure(
        extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    pdb_file.set_structure(stack1)
    stack2 = pdb_file.get_structure(
        extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    assert stack1.atom_id.tolist() == stack2.atom_id.tolist()
    assert stack1.b_factor.tolist() == stack2.b_factor.tolist()
    assert stack1.occupancy.tolist() == stack2.occupancy.tolist()
    assert stack1.charge.tolist() == stack2.charge.tolist()
    assert stack1 == stack2
Ejemplo n.º 16
0
def test_guess_elements():
    # read valid pdb file
    path = join(data_dir, "1l2y.pdb")
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    stack = pdb_file.get_structure()

    # remove all elements
    removed_stack = stack.copy()
    removed_stack.element[:] = ''

    # save stack without elements to tmp file
    tmp_file_name = biotite.temp_file(".pdb")
    tmp_pdb_file = pdb.PDBFile()
    tmp_pdb_file.set_structure(removed_stack)
    tmp_pdb_file.write(tmp_file_name)

    # read new stack from file with guessed elements
    guessed_pdb_file = pdb.PDBFile()
    guessed_pdb_file.read(tmp_file_name)
    guessed_stack = guessed_pdb_file.get_structure()

    assert guessed_stack.element.tolist() == stack.element.tolist()
Ejemplo n.º 17
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
Ejemplo n.º 18
0
def test_pdb_consistency(path):
    pdb_path = splitext(path)[0] + ".pdb"
    pdb_file = pdb.PDBFile()
    pdb_file.read(pdb_path)
    a1 = pdb_file.get_structure(model=1)
    gro_file = gro.GROFile()
    gro_file.read(path)
    a2 = gro_file.get_structure(model=1)

    assert a1.array_length() == a2.array_length()

    for category in ["res_id", "res_name", "atom_name"]:
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()

    # Mind rounding errors when converting pdb to gro (A -> nm)
    assert a1.coord.flatten().tolist() \
        == approx(a2.coord.flatten().tolist(), abs=1e-2)
Ejemplo n.º 19
0
def load_pdbs_from_tar_gz(file_path, mode="r:gz"):
    """

    """
    if type(file_path) is not str:
        raise RuntimeError("Argument \'file_path\' is not of type str!")
    archive = tarfile.open(name=file_path, mode=mode)
    coords = []
    for pdb in archive.getmembers():
        if pdb.name == 'test_archive':
            continue
        pdb_handle = archive.extractfile(pdb)
        content = pdb_handle.read().decode()
        f = io.StringIO(content)
        pdbfile = bpdb.PDBFile()
        pdbfile.read(f)
        coords.append(pdbfile.get_coord()[0])
    archive.close()
    return np.array(coords)
Ejemplo n.º 20
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
    elif format == "fasta":
        file = fasta.FastaFile()
        file.read(file_path_or_obj)
        # Test if the file contains any sequences
        assert len(fasta.get_sequences(file)) > 0
Ejemplo n.º 21
0
def test_guess_elements():
    # Read valid pdb file
    path = join(data_dir("structure"), "1l2y.pdb")
    pdb_file = pdb.PDBFile.read(path)
    stack = pdb_file.get_structure()

    # Remove all elements
    removed_stack = stack.copy()
    removed_stack.element[:] = ''

    # Save stack without elements to tmp file
    temp = TemporaryFile("w+")
    tmp_pdb_file = pdb.PDBFile()
    tmp_pdb_file.set_structure(removed_stack)
    tmp_pdb_file.write(temp)

    # Read new stack from file with guessed elements
    temp.seek(0)
    guessed_pdb_file = pdb.PDBFile.read(temp)
    temp.close()
    guessed_stack = guessed_pdb_file.get_structure()

    assert guessed_stack.element.tolist() == stack.element.tolist()
Ejemplo n.º 22
0
# possible, namely the miniprotein *TC5b* (PDB: ``1L2Y```).
# The structure of this 20-residue protein (304 atoms) has been
# elucidated via NMR.
# Thus, the corresponding PDB file consists of multiple (namely 38)
# models, each showing another conformation.
#
# .. currentmodule:: biotite.structure.io.pdb
#
# At first we load the structure from a PDB file via the class
# :class:`PDBFile` in the subpackage :mod:`biotite.structure.io.pdb`.

import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
pdb_file_path = rcsb.fetch("1l2y", "pdb", biotite.temp_dir())
file = pdb.PDBFile()
file.read(pdb_file_path)
tc5b = file.get_structure()
print(type(tc5b).__name__)
print(tc5b.stack_depth())
print(tc5b.array_length())

########################################################################
# The method :func:`PDBFile.get_structure()` returns an atom array stack
# unless the :obj:`model` parameter is specified,
# even if the file contains only one model.
# Alternatively, the module level function :func:`get_structure()`
# can be used.
# The following example
# shows how to write an array or stack back into a PDB file:
Ejemplo n.º 23
0
    mask_exit = np.logical_and(inds_chain,np.in1d(struct.res_id, np.array([3586,3661,3668,3832,3840,4024])))
    active_sites = traj[:,mask_active_site,:]
    active_sites = np.mean(active_sites,axis = 1)
    coords_exit = traj[:,mask_exit,:]
    coords_exit = np.mean(coords_exit,axis = 1)
    density = np.empty((traj.shape[0],len(radius)))
    for j,r in enumerate(radius):
        for i in range(traj.shape[0]):
            n_atoms = np.where(in_cylinder(coords_exit[i],active_sites[i],r,traj[i]))[0].shape
            volume = np.linalg.norm(coords_exit[i] - active_sites[i]) * np.pi * r**2
            density[i,j] = n_atoms / volume
    return density

radius = [6,8,9,10,11,12,13,14,15,16]

struct = PDB.PDBFile()
struct.read("../md_simulations/kinase_dimer.pdb")
struct = struct.get_structure()[0]

traj = XTC.XTCFile()
traj.read("../md_simulations/kinase_dimer_nopbc_cluster_fit.xtc")
traj = traj.get_coord()

density_a = calculate_densities(struct,traj,"A",radius)
density_data_a = pd.DataFrame(data=density_a,columns = [str(i) for i in radius])
density_data_a.to_csv("density_data_kinase_a.csv")

density_d = calculate_densities(struct,traj,"D",radius)
density_data_d = pd.DataFrame(data=density_d,columns = [str(i) for i in radius])
density_data_d.to_csv("density_data_kinase_d.csv")
Ejemplo n.º 24
0
import biotite.structure.io.pdb as pdb
import biotite.structure as struc

pdb_file = pdb.PDBFile()
pdb_file.read(snakemake.input[0])
# Only use one model
structure = pdb_file.get_structure(model=1)
# Remove water
structure = structure[~struc.filter_solvent(structure)]
# Remove hydrogens
structure = structure[structure.element != "H"]
pdb_file.set_structure(structure)
pdb_file.write(snakemake.output[0])
Ejemplo n.º 25
0
def load_pdbs_from_dict(dir_path, sc=None):
    """

    """
    if type(dir_path) is not str:
        raise RuntimeError("Argument \'dir_path\' is not of type str!")
    files = os.listdir(dir_path)

    if sc is not None:
        print("using score")
        score = pd.read_csv(sc, header=1, delimiter="\s+")
        score = score[~pd.isnull(score['description'])]
        assert (len(score.keys()) > 0)
        assert ('description' in score.keys())

        #        for f in score['description']:
        #            cond = f.split("/")[-1]+".pdb" in files
        #            if not cond:
        #                print(f)
        #            assert(cond)

        files = [
            os.path.join(dir_path,
                         f.split("/")[-1] + ".pdb")
            for f in score['description']
        ]

    else:
        files = [os.path.join(dir_path, f) for f in files]

    coords = []
    i = 0

    dT = 0

    N_files = len(files)

    #markers = np.linspace(0, 1, 100)
    p_marker_delta = 1.0 / 100

    marker = ''

    p_last = 0
    times = []

    print("first file ::" + str(files[0]))
    print("last  file ::" + str(files[-1]))

    for f in files:
        t1 = time.time()

        pdbfile = bpdb.PDBFile()
        pdbfile.read(f)

        try:
            #            if coords is None:
            #                coords = pdbfile.get_coord()[0]
            #            else:
            #                coords = np.vstack((coords, pdbfile.get_coord()[0]))

            coords.append(pdbfile.get_coord()[0])
        except:

            print("")
            print("")
            print(" tried loading file ::" + str(f))
            print("")
            exit()

        t2 = time.time()

        times.append(t2 - t1)

        p = float(i + 1) / float(N_files)

        if p - p_last >= p_marker_delta:
            p_last = p
            marker = marker + "="

        T_eta = np.mean(times) * (N_files - (i + 1))
        T_eta_err = np.std(times)

        print("loading file [" + str("") + " | " + str(i) + " / " +
              str(N_files) + "]  " + "|" + str(marker) + ">   " +
              str(p * 100) + "% complete" + " ETA :: " +
              str(datetime.timedelta(seconds=T_eta)) + " +/- " +
              str(T_eta_err) + "s ",
              end='\r')

        i = i + 1
    print("")
    print("")
    print(" ... finished loading pdbs ... ")

    return np.array(coords, dtype=np.float64)