def test_loading_with_extra_args(): """ Check if :func:`load_structure()` witt optional arguments does not raise an exception and returns an object of appropriate type. """ template = join(data_dir("structure"), "1l2y.pdb") trajectory = join(data_dir("structure"), "1l2y.xtc") # test if arguments are passed to text files as get_structure arg structure = strucio.load_structure(template, extra_fields=["b_factor"]) assert "b_factor" in structure.get_annotation_categories() # test if arguments are passed to read for trajectories stack = strucio.load_structure(trajectory, template=structure[0], start=5, stop=6) assert len(stack) == 1 # loading should fail with wrong arguments with pytest.raises(TypeError): strucio.load_structure(template, start=2) # test if atom_i argument is passed to templates stack = strucio.load_structure(trajectory, template, atom_i=[1, 2]) assert stack.shape[1] == 2
def test_saving(suffix): """ Check if loading a structure from a file written via :func:`save_structure()` gives the same result as the input to :func:`save_structure()`. """ path = join(data_dir("structure"), "1l2y.mmtf") ref_array = strucio.load_structure(path) if suffix in ("trr", "xtc", "tng", "dcd", "netcdf"): # Reading a trajectory file requires a template template = path else: template = None temp = NamedTemporaryFile("w", suffix=f".{suffix}", delete=False) strucio.save_structure(temp.name, ref_array) temp.close() test_array = strucio.load_structure(temp.name, template) os.remove(temp.name) for category in ref_array.get_annotation_categories(): if category == "chain_id" and suffix == "gro": # The chain ID is not written to GRO files continue assert test_array.get_annotation(category).tolist() \ == ref_array.get_annotation(category).tolist() assert test_array.coord.flatten().tolist() == pytest.approx( ref_array.coord.flatten().tolist(), abs=1e-2)
def test_masked_superimposition(seed): """ Take two models of the same structure and superimpose based on a single, randomly chosen atom. Since two atoms can be superimposed perfectly, the distance between the atom in both models should be 0. """ path = join(data_dir, "1l2y.mmtf") fixed = strucio.load_structure(path, model=1) mobile = strucio.load_structure(path, model=2) # Create random mask for a single atom np.random.seed(seed) mask = np.full(fixed.array_length(), False) mask[np.random.randint(fixed.array_length())] = True # The distance between the atom in both models should not be # already 0 prior to superimposition assert struc.distance(fixed[mask], mobile[mask])[0] \ != pytest.approx(0, abs=5e-4) fitted, transformation = struc.superimpose(fixed, mobile, mask) assert struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4) fitted = struc.superimpose_apply(mobile, transformation) struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4)
def test_loading(path): if splitext(path)[1] in [".trr", ".xtc", ".tng", ".dcd", ".netcdf"]: template = strucio.load_structure( join(data_dir("structure"), "1l2y.mmtf")) array = strucio.load_structure(path, template) else: array = strucio.load_structure(path)
def test_pdbx_consistency(path): cif_path = splitext(path)[0] + ".cif" array1 = strucio.load_structure(path) array2 = strucio.load_structure(cif_path) for category in array1.get_annotation_categories(): assert array1.get_annotation(category).tolist() == \ array2.get_annotation(category).tolist() assert array1.coord.tolist() == array2.coord.tolist()
def test_pdbx_consistency(path): cif_path = splitext(path)[0] + ".cif" array1 = strucio.load_structure(path) array2 = strucio.load_structure(cif_path) if array2.box is not None: assert np.allclose(array1.box, array2.box) assert array1.bonds == array2.bonds for category in array1.get_annotation_categories(): assert array1.get_annotation(category).tolist() == \ array2.get_annotation(category).tolist() assert array1.coord.tolist() == array2.coord.tolist()
def test_rdf(): """ General test to reproduce oxygen RDF for a box of water""" test_file = TEST_FILE stack = load_structure(test_file) # calculate oxygen RDF for water oxygen = stack[:, stack.atom_name == 'OW'] interval = np.array([0, 10]) n_bins = 100 bins, g_r = rdf(oxygen[:, 0].coord, oxygen, interval=interval, bins=n_bins, periodic=False) # Compare with MDTraj import mdtraj traj = mdtraj.load(TEST_FILE) ow = [a.index for a in traj.topology.atoms if a.name == 'O'] pairs = itertools.product([ow[0]], ow) mdt_bins, mdt_g_r = mdtraj.compute_rdf(traj, list(pairs), r_range=interval / 10, n_bins=n_bins, periodic=False) assert np.allclose(bins, mdt_bins * 10) assert np.allclose(g_r, mdt_g_r, rtol=0.0001)
def test_rdf_multiple_center(): """ Test if the first argument allows to use multiple centers""" stack = load_structure(TEST_FILE) # calculate oxygen RDF for water with and without a selection oxygen = stack[:, stack.atom_name == 'OW'] interval = np.array([0, 10]) n_bins = 100 # averaging individual calculations bins1, g_r1 = rdf(oxygen[:, 1].coord, oxygen[:, 2:], interval=interval, bins=n_bins, periodic=False) bins2, g_r2 = rdf(oxygen[:, 0].coord, oxygen[:, 2:], interval=interval, bins=n_bins, periodic=False) mean = np.mean([g_r1, g_r2], axis=0) # this should give the same result as averaging for oxygen 0 and 1 bins, g_r = rdf(oxygen[:, 0:2].coord, oxygen[:, 2:], interval=interval, bins=n_bins, periodic=False) assert np.allclose(g_r, mean, rtol=0.0001)
def create(pdb_id, directory, include_gro): # Create *.pdb", *.cif and *.mmtf for file_format in ["pdb", "cif", "mmtf"]: rcsb.fetch(pdb_id, file_format, directory, overwrite=True) try: array = strucio.load_structure(join(directory, pdb_id + ".pdb")) except biotite.InvalidFileError: # Structure probably contains multiple models with different # number of atoms # -> Cannot load AtomArrayStack # -> Skip writing GRO and NPZ file return # Create *.gro file strucio.save_structure(join(directory, pdb_id + ".npz"), array) # Create *.gro files using GROMACS # Clean PDB file -> remove inscodes and altlocs if include_gro: cleaned_file_name = biotite.temp_file("pdb") strucio.save_structure(cleaned_file_name, array) # Run GROMACS for file conversion subprocess.run([ "editconf", "-f", cleaned_file_name, "-o", join(directory, pdb_id + ".gro") ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def test_hbond_with_selections(): """ When selection1 and selection2 is defined, no hydrogen bonds outside of this boundary should be found. Also, hbond should respect the selection type. """ stack = load_structure(join(data_dir, "1l2y.mmtf")) selection1 = (stack.res_id == 3) & (stack.atom_name == 'O') # 3TYR BB Ox selection2 = stack.res_id == 7 # backbone hbond should be found if selection1/2 type is both triplets, mask = struc.hbond(stack, selection1, selection2, selection1_type="both") assert len(triplets) == 1 assert triplets[0][0] == 116 assert triplets[0][2] == 38 # backbone hbond should be found if selection1 is acceptor and # selection2 is donor triplets, mask = struc.hbond(stack, selection1, selection2, selection1_type="acceptor") assert len(triplets) == 1 assert triplets[0][0] == 116 assert triplets[0][2] == 38 # no hbond should be found, # because the backbone oxygen cannot be a donor triplets, mask = struc.hbond(stack, selection1, selection2, selection1_type="donor") assert len(triplets) == 0
def test_small_molecule(): """ Check if loading a small molecule file written via :func:`save_structure()` gives the same result as the input to :func:`save_structure()`. """ path = join(data_dir("structure"), "molecules", "TYR.sdf") ref_array = strucio.load_structure(path) temp = NamedTemporaryFile("w", suffix=".sdf", delete=False) strucio.save_structure(temp.name, ref_array) temp.close() test_array = strucio.load_structure(temp.name) os.remove(temp.name) assert test_array == ref_array
def test_adjacency_matrix(cell_size, threshold, periodic): """ Compare the construction of an adjacency matrix using a cell list and using a computationally expensive but simpler distance matrix. """ array = strucio.load_structure(join(data_dir, "3o5r.mmtf")) if periodic: # Create an orthorhombic box # with the outer coordinates as bounds array.box = np.diag( np.max(array.coord, axis=-2) - np.min(array.coord, axis=-2)) cell_list = struc.CellList(array, cell_size=cell_size, periodic=periodic) matrix = cell_list.create_adjacency_matrix(threshold) # Create distance matrix # Convert to float64 to avoid errorenous warning # https://github.com/ContinuumIO/anaconda-issues/issues/9129 array.coord = array.coord.astype(np.float64) length = array.array_length() distance = struc.index_distance( array, np.stack([ np.repeat(np.arange(length), length), np.tile(np.arange(length), length) ], axis=-1), periodic) distance = np.reshape(distance, (length, length)) # Create adjacency matrix from distance matrix expected_matrix = (distance <= threshold) # Both ways to create an adjacency matrix # should give the same result assert np.array_equal(matrix, expected_matrix)
def test_rdf_periodic(): """ Test if the periodic argument gives the correct results""" test_file = TEST_FILE stack = load_structure(test_file) # calculate oxygen RDF for water oxygen = stack[:, stack.atom_name == 'OW'] interval = np.array([0, 10]) n_bins = 100 bins, g_r = rdf(oxygen[:, 0].coord, oxygen[:, 1:], interval=interval, bins=n_bins, periodic=True) # Compare with MDTraj import mdtraj traj = mdtraj.load(TEST_FILE) ow = [a.index for a in traj.topology.atoms if a.name == 'O'] pairs = itertools.product([ow[0]], ow[1:]) mdt_bins, mdt_g_r = mdtraj.compute_rdf(traj, list(pairs), r_range=interval / 10, n_bins=n_bins, periodic=True) assert np.allclose(bins, mdt_bins * 10) assert np.allclose(g_r, mdt_g_r, rtol=0.0001)
def test_array_conversion(format): template = strucio.load_structure(join(data_dir, "1l2y.mmtf"))[0] # Add fake box template.box = np.diag([1, 2, 3]) if format == "trr": traj_file_cls = trr.TRRFile if format == "xtc": traj_file_cls = xtc.XTCFile if format == "tng": traj_file_cls = tng.TNGFile if format == "dcd": traj_file_cls = dcd.DCDFile if format == "netcdf": traj_file_cls = netcdf.NetCDFFile traj_file = traj_file_cls() traj_file.read(join(data_dir, f"1l2y.{format}")) ref_array = traj_file.get_structure(template) traj_file = traj_file_cls() traj_file.set_structure(ref_array) file_name = biotite.temp_file(format) traj_file.write(file_name) traj_file = traj_file_cls() traj_file.read(file_name) array = traj_file.get_structure(template) assert ref_array.bonds == array.bonds assert ref_array.equal_annotation_categories(array) assert ref_array.box == pytest.approx(array.box) assert ref_array.coord == pytest.approx(array.coord, abs=1e-2)
def write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup): """ Write a new atom to a reference structure to visualise conserved non-protein atom sites. Parameters ---------- pdb_outname : str Filename of reference structure. atom_location : array (x,y,z) coordinates of the atom location with respect to the reference structure. atom_ID : str A unique ID for the atom. atomgroup : str MDAnalysis atomgroup to describe the atom. """ ##PDB_VISUALISATION ##rescursively add waters to the pdb file one by one as they are processed # # Read the file into Biotite's structure object (atom array) atom_array = strucio.load_structure(pdb_outname) res_id = atom_array.res_id[-1] + 1 # Add an HETATM atom = struc.Atom( coord=atom_location, chain_id="X", # The residue ID is the last ID in the file +1 res_id=res_id, res_name=atom_ID, hetero=True, atom_name=atomgroup, element="O") atom_array += struc.array([atom]) # Save edited structure strucio.save_structure(pdb_outname, atom_array)
def test_hbond_structure(pdb_id): file_name = join(data_dir("structure"), pdb_id + ".mmtf") array = load_structure(file_name) # Only consider amino acids for consistency # with bonded hydrogen detection in MDTraj array = array[..., struc.filter_amino_acids(array)] if isinstance(array, struc.AtomArrayStack): # For consistency with MDTraj 'S' cannot be acceptor element # https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/hbond.py#L365 triplets, mask = struc.hbond(array, acceptor_elements=("O", "N")) else: triplets = struc.hbond(array, acceptor_elements=("O", "N")) # Save to new pdb file for consistent treatment of inscode/altloc # im MDTraj temp = NamedTemporaryFile("w+", suffix=".pdb") save_structure(temp.name, array) # Compare with MDTraj import mdtraj traj = mdtraj.load(temp.name) temp.close() triplets_ref = mdtraj.baker_hubbard(traj, freq=0, periodic=False) # Both packages may use different order # -> use set for comparison triplets_set = set([tuple(tri) for tri in triplets]) triplets_ref_set = set([tuple(tri) for tri in triplets_ref]) assert triplets_set == triplets_ref_set
def test_index_functions(): """ The `index_xxx()` functions should give the same result as the corresponding `xxx` functions. """ stack = strucio.load_structure(join(data_dir, "1l2y.mmtf")) array = stack[0] # Test for atom array, stack and raw coordinates samples = (array, stack, struc.coord(array), struc.coord(stack)) # Generate random indices random.seed(42) indices = random.randint(array.array_length(), size=(100, 4), dtype=int) for sample in samples: if isinstance(sample, np.ndarray): atoms1 = sample[..., indices[:, 0], :] atoms2 = sample[..., indices[:, 1], :] atoms3 = sample[..., indices[:, 2], :] atoms4 = sample[..., indices[:, 3], :] else: atoms1 = sample[..., indices[:, 0]] atoms2 = sample[..., indices[:, 1]] atoms3 = sample[..., indices[:, 2]] atoms4 = sample[..., indices[:, 3]] assert np.allclose(struc.displacement(atoms1, atoms2), struc.index_displacement(sample, indices[:, :2]), atol=1e-5) assert np.allclose(struc.distance(atoms1, atoms2), struc.index_distance(sample, indices[:, :2]), atol=1e-5) assert np.allclose(struc.angle(atoms1, atoms2, atoms3), struc.index_angle(sample, indices[:, :3]), atol=1e-5) assert np.allclose(struc.dihedral(atoms1, atoms2, atoms3, atoms4), struc.index_dihedral(sample, indices[:, :4]), atol=1e-5)
def test_superimposition_stack(ca_only): """ Take a structure with multiple models where each model is not (optimally) superimposed onto each other. Then superimpose and expect an improved RMSD. """ path = join(data_dir, "1l2y.mmtf") stack = strucio.load_structure(path) fixed = stack[0] mobile = stack[1:] if ca_only: mask = (mobile.atom_name == "CA") else: mask = None fitted, transformation = struc.superimpose(fixed, mobile, mask) if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file # -> Use average assert np.mean(struc.rmsd(fixed, fitted)) \ < np.mean(struc.rmsd(fixed, mobile)) else: # The superimpositions are better than the superimpositions # in the structure file assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
def test_atom_array_consistency(): """ Test whether the associated bonds of an `AtomArray` still point to the same atoms after indexing with a boolean mask. The boolean mask is constructed in a way that all bonded atoms are masked. """ array = strucio.load_structure(join(data_dir("structure"), "1l2y.mmtf"))[0] ca = array[array.atom_name == "CA"] # Just for testing, does not reflect real bonds bond_list = struc.BondList( ca.array_length(), np.array([(0, 1), (2, 8), (5, 15), (1, 5), (0, 9), (3, 18), (2, 9)])) ca.bonds = bond_list ref_ids = ca.res_id[bond_list.as_array()[:, :2].flatten()] # Some random boolean mask as index, # but all bonded atoms are included mask = np.array( [1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1], dtype=bool) masked_ca = ca[mask] test_ids = masked_ca.res_id[masked_ca.bonds.as_array()[:, :2].flatten()] # The bonds, should always point to the same atoms (same res_id), # irrespective of indexing assert test_ids.tolist() == ref_ids.tolist()
def test_standardize_order(multi_model, seed): original = load_structure(join(data_dir("structure"), "1l2y.mmtf")) if not multi_model: original = original[0] # The box is not preserved when concatenating atom arrays later # This would complicate the atom array equality later original.box = None # Randomly reorder the atoms in each residue np.random.seed(seed) if multi_model: reordered = struc.AtomArrayStack(original.stack_depth(), 0) else: reordered = struc.AtomArray(0) for residue in struc.residue_iter(original): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) reordered += residue[..., indices] # Restore the original PDB standard order restored = reordered[..., strucinfo.standardize_order(reordered)] assert restored.shape == original.shape assert restored[..., restored.element != "H"] \ == original[..., original.element != "H"]
def test_sse(): array = strucio.load_structure(join(data_dir("structure"), "3o5r.mmtf")) sse = struc.annotate_sse(array, "A") sse_str = "".join(sse.tolist()) assert sse_str == ("caaaaaacccccccccccccbbbbbccccccbbbbccccccccccccccc" "ccccccccccccbbbbbbcccccccaaaaaaaaaccccccbbbbbccccc" "ccccccccccccbbbbbbbccccccccc")
def __getitem__(self, index): print(os.path.join(self.fileDir, self.files[index])) array = strucio.load_structure( os.path.join(self.fileDir, self.files[index])) if type(array) == biotite.structure.AtomArrayStack: array = array[0] # print(os.path.join(self.fileDir, self.files[index])) # print(type(array)) ca = array[array.atom_name == "CA"] cell_list = struc.CellList(ca, cell_size=self.threshold) # cell_list = struc.CellList(array, cell_size=self.threshold) adj_matrix = cell_list.create_adjacency_matrix( self.threshold).astype(int) shape = adj_matrix.shape if shape[0] % 2 != 0: print(shape) adj_matrix = np.append(adj_matrix, np.zeros((1, shape[0]), dtype=float), axis=0) adj_matrix = np.append(adj_matrix, np.zeros((shape[0] + 1, 1), dtype=float), axis=1) print(adj_matrix.shape) # return torch.tensor(adj_matrix.astype('float')) return adj_matrix.astype('double')
def test_doctest(package_name, context_package_names): """ Run all doctest strings in all Biotite subpackages. """ # Collect all attributes of this package and its subpackages # as globals for the doctests globs = {} mod_names = [] #The package itself is also used as context for name in context_package_names + [package_name]: context_package = import_module(name) globs.update({ attr: getattr(context_package, attr) for attr in dir(context_package) }) # Add fixed names for certain paths globs["path_to_directory"] = tempfile.gettempdir() globs["path_to_structures"] = join(".", "tests", "structure", "data") globs["path_to_sequences"] = join(".", "tests", "sequence", "data") # Add frequently used modules globs["np"] = np # Add frequently used objects globs["atom_array_stack"] = strucio.load_structure( join(".", "tests", "structure", "data", "1l2y.mmtf")) globs["atom_array"] = globs["atom_array_stack"][0] # Adjust NumPy print formatting np.set_printoptions(precision=3, floatmode="maxprec_equal") # Run doctests # This test does not use 'testfile()' or 'testmod()' # due to problems with doctest identification for Cython modules # More information below package = import_module(package_name) runner = doctest.DocTestRunner(verbose=False, optionflags=doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE) for test in doctest.DocTestFinder(exclude_empty=False).find( package, package.__name__, # It is necessary to set 'module' to 'False', as otherwise # Cython functions and classes would be falsely identified # as members of an external module by 'DocTestFinder._find()' # and consequently would be ignored # # Setting 'module=False' omits this check # This check is not necessary as the biotite subpackages # ('__init__.py' modules) should only contain attributes, that # are part of the package itself. module=False, extraglobs=globs): runner.run(test) results = doctest.TestResults(runner.failures, runner.tries) try: assert results.failed == 0 except AssertionError: print(f"Failing doctest in module {package}") raise
def nuc_sample_array(): """ Sample structure. """ nuc_sample_array = strucio.load_structure( join(data_dir("structure"), "4p5j.cif") ) return nuc_sample_array[struc.filter_nucleotides(nuc_sample_array)]
def test_outside_location(): # Test result for location outside any cell array = strucio.load_structure(join(data_dir, "3o5r.mmtf")) array = array[struc.filter_amino_acids(array)] cell_list = struc.CellList(array, cell_size=5) outside_coord = np.min(array.coord, axis=0) - 100 # Expect empty array assert len(cell_list.get_atoms(outside_coord, 5)) == 0
def get_reference_from_structure( structure_path: str, positions: t.Optional[t.Container[int]] = None) -> str: aa_mapping = AminoAcidDict().aa_dict residues = zip(*bst.get_residues(io.load_structure(structure_path))) if positions is not None: residues = (r for r in residues if r[0] in positions) return "".join([aa_mapping[r[1]] for r in residues])
def test_loading_with_extra_args(): template = join(data_dir("structure"), "1l2y.pdb") trajectory = join(data_dir("structure"), "1l2y.xtc") # test if arguments are passed to text files as get_structure arg structure = strucio.load_structure(template, extra_fields=["b_factor"]) assert "b_factor" in structure.get_annotation_categories() # test if arguments are passed to read for trajectories stack = strucio.load_structure(trajectory, template=structure[0], start=5, stop=6) assert len(stack) == 1 # loading should fail with wrong arguments with pytest.raises(TypeError): strucio.load_structure(template, start=2)
def test_loading(path): """ Just check if :func:`load_structure()` does not raise an exception and returns an object of appropriate type. """ suffix = splitext(path)[1] if suffix in [".trr", ".xtc", ".tng", ".dcd", ".netcdf"]: template = strucio.load_structure( join(data_dir("structure"), "1l2y.mmtf")) array = strucio.load_structure(path, template) else: array = strucio.load_structure(path) if suffix == ".gro": # The test GRO file contains only a single model, # since it is created by Gromacs assert isinstance(array, struc.AtomArray) else: assert isinstance(array, struc.AtomArrayStack)
def test_saving_with_extra_args(suffix): """ Test if giving a wrong optional parameter to :func:`save_structure()` raises a :class:`TypeError` """ array = strucio.load_structure(join(data_dir("structure"), "1l2y.mmtf")) temp = NamedTemporaryFile("w+", suffix=f".{suffix}") with pytest.raises(TypeError): strucio.save_structure(temp.name, array, answer=42) temp.close()
def test_rdf_bins(): """ Test if RDF produce correct bin ranges """ stack = load_structure(TEST_FILE) center = stack[:, 0] num_bins = 44 bin_range = (0, 11.7) bins, g_r = rdf(center, stack, bins=num_bins, interval=bin_range) assert (len(bins) == num_bins) assert (bins[0] > bin_range[0]) assert (bins[1] < bin_range[1])