def test_get_assembly(single_model): """ Test whether the :func:`get_assembly()` function produces the same number of peptide chains as the ``_pdbx_struct_assembly.oligomeric_count`` field indicates. """ model = 1 if single_model else None path = join(data_dir, "1f2n.cif") pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) assembly_category = pdbx_file.get_category("pdbx_struct_assembly", expect_looped=True) # Test each available assembly for id, ref_oligomer_count in zip(assembly_category["id"], assembly_category["oligomeric_count"]): assembly = pdbx.get_assembly(pdbx_file, assembly_id=id, model=model) protein_assembly = assembly[..., struc.filter_amino_acids(assembly)] test_oligomer_count = struc.get_chain_count(protein_assembly) if single_model: assert isinstance(assembly, struc.AtomArray) else: assert isinstance(assembly, struc.AtomArrayStack) assert test_oligomer_count == int(ref_oligomer_count)
def test_get_assembly(model): """ Test whether the :func:`get_assembly()` function produces the same number of peptide chains as the ``_pdbx_struct_assembly.oligomeric_count`` field indicates. Furthermore, check if the number of atoms in the entire assembly is a multiple of the numbers of atoms in a monomer. """ path = join(data_dir("structure"), "1f2n.cif") pdbx_file = pdbx.PDBxFile.read(path) assembly_category = pdbx_file.get_category("pdbx_struct_assembly", expect_looped=True) # Test each available assembly for id, ref_oligomer_count in zip(assembly_category["id"], assembly_category["oligomeric_count"]): print("Assembly ID:", id) try: assembly = pdbx.get_assembly(pdbx_file, assembly_id=id, model=model) except biotite.InvalidFileError: if model is None: # The file cannot be parsed into an AtomArrayStack, # as the models contain different numbers of atoms # -> skip this test case return else: raise protein_assembly = assembly[..., struc.filter_amino_acids(assembly)] test_oligomer_count = struc.get_chain_count(protein_assembly) if model is None: assert isinstance(assembly, struc.AtomArrayStack) else: assert isinstance(assembly, struc.AtomArray) assert test_oligomer_count == int(ref_oligomer_count) # The atom count of the entire assembly should be a multiple # a monomer, monomer_atom_count = pdbx.get_structure(pdbx_file).array_length() assert assembly.array_length() % monomer_atom_count == 0
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1M4X", "mmcif")) assemblies = pdbx.list_assemblies(pdbx_file) print("ID name") print() for assembly_id, name in assemblies.items(): print(f"{assembly_id:2} {name}") ######################################################################## # ``'complete icosahedral assembly'`` sounds good. # In fact, often the first assembly is the complete one. # Hence, the :func:`get_assembly()` function builds the first assembly # by default. # Since we know the ID we want (``'1'``), we will provide it to this # function anyway. # It returns the chosen assembly as :class:`AtomArray`. # Note that the assembly ID is a string, not an integer. biological_unit = pdbx.get_assembly(pdbx_file, assembly_id="1", model=1) print("Number of protein chains:", struc.get_chain_count(biological_unit)) ######################################################################## # Now we could do some analysis on the biological unit. # But for this example we will simply save the entire assembly as *PDB* # file for later visualization. # For brevity, save only CA atoms to file for visualization with PyMOL #biological_unit = biological_unit[biological_unit.atom_name == "CA"] #strucio.save_structure("biological_assembly.pdb", biological_unit) # biotite_static_image = biological_assembly.png
def test_get_chain_count(array): assert struc.get_chain_count(array) == 6