def renumber_pdb_needleman(inseq, instr, seqback=False): """Return modified :class:`gemmi.Structure` with new residue numbers. It uses Needleman-Wunsch algorithm to perform the sequence alignment. :param inseq: Input sequence. :type inseq: :class:`crops.elements.sequences.oligoseq` :param instr: Gemmi structure. :type instr: :class:`gemmi.Structure` :param seqback: If True, it additionally returns the :class:`crops.elements.sequences.oligoseq` with the gaps found in the structure, defaults to False. :type seqback: bool, optional :return instr: Renumbered structure. :rtype instr: :class:`gemmi.Structure` :return inseq: Sequence with extra information about gaps, only if seqback is True. :rtype inseq: :class:`crops.elements.sequences.oligoseq` """ renumbered_structure = gemmi.Structure() renumbered_structure.name = instr.name if seqback: for monkey in inseq.imer: inseq.imer[monkey].seqs['gapseq'] = [] for model in instr: renumbered_model = gemmi.Model(model.name) for chain in model: renumbered_chain = gemmi.Chain(chain.name) nseq = inseq.whatseq(chain.name) original_seq = inseq.imer[nseq].seqs['mainseq'] model_seq = ''.join([ressymbol(x.name) for x in chain]) aligned_dict = get_sequence_alignment(original_seq, model_seq) if aligned_dict is None: logging.warning('Alignment failed for chain {}, it will be excluded'.format(chain.name)) continue for index, residue in enumerate(chain): _residue = residue.clone() _residue.seqid.num = aligned_dict[index] + 1 renumbered_chain.add_residue(_residue) renumbered_model.add_chain(renumbered_chain) if seqback: res_set = {residue.seqid.num for residue in renumbered_chain} newseq = '' for n in range(len(original_seq)): if (n+1) in res_set: newseq += original_seq[n] else: newseq += '-' inseq.imer[nseq].seqs['gapseq'].append(newseq) renumbered_structure.add_model(renumbered_model) if seqback: return renumbered_structure, inseq else: return renumbered_structure
def test_model_all(self): model = gemmi.Model('1') for name in 'ABCDEFG': model.add_chain(gemmi.Chain(name)) expected = [] for cname in 'BCF': chain = model[cname] for _ in range(7): chain.add_residue(gemmi.Residue()) for (r, name) in [(2, '0'), (2, '1'), (3, '2'), (5, '3')]: a = gemmi.Atom() a.name = cname + name expected.append(a.name) chain[r].add_atom(a) self.assertEqual([cra.atom.name for cra in model.all()], expected) st = gemmi.Structure() st.add_model(model) st.remove_empty_chains() self.assertEqual([cra.atom.name for cra in model.all()], expected)
def test_remove2(self): model = gemmi.read_structure(full_path('1pfe.cif.gz'))[0] self.assertEqual(len(model), 2) del model['A'] self.assertEqual(len(model), 1) b = model['B'] self.assertEqual(b[0].name, 'DSN') del b['1']['DSN'] self.assertEqual(b[0].name, 'ALA') del b[0] self.assertEqual(b[0].name, 'N2C') # test append_residues() self.assertEqual(len(b), 20) b.append_residues(b[:5], min_sep=10) self.assertEqual(len(b), 25) # test append_residues() with empty chain new_chain = gemmi.Chain('X') new_chain.append_residues(b[:5], min_sep=1) self.assertEqual(len(new_chain), 5)
def get_masked_pdb(pdb: gemmi.Structure, coord: Coord, radius: float = 8.0) -> gemmi.Structure: event_centoid = gemmi.Position( coord.x, coord.y, coord.z, ) new_structure = gemmi.Structure() for model_i, model in enumerate(pdb): new_model = gemmi.Model(model.name) new_structure.add_model(new_model, pos=-1) for chain_i, chain in enumerate(model): new_chain = gemmi.Chain(chain.name) new_structure[model_i].add_chain(new_chain, pos=-1) for residue_i, residue in enumerate(chain): new_residue = gemmi.Residue() new_residue.name = residue.name new_residue.seqid = residue.seqid new_residue.subchain = residue.subchain new_residue.label_seq = residue.label_seq new_residue.het_flag = residue.het_flag new_structure[model_i][chain_i].add_residue(new_residue, pos=-1) for atom_i, atom in enumerate(residue): pos = atom.pos if pos.dist(event_centoid) > radius: new_structure[model_i][chain_i][residue_i].add_atom( atom, pos=-1) for model_i, model in enumerate(pdb): pdb.add_model(new_structure[model_i], pos=-1) del pdb[0] return pdb
def test_remove2(self): st = gemmi.read_structure(full_path('1pfe.cif.gz')) model = st[0] self.assertEqual(len(model), 2) b = model['B'] self.assertEqual(b[0].name, 'DSN') del b['1']['DSN'] self.assertEqual(b[0].name, 'ALA') del b[0] self.assertEqual(b[0].name, 'N2C') # test append_residues() self.assertEqual(len(b), 20) b.append_residues(b[:5], min_sep=10) self.assertEqual(len(b), 25) # test append_residues() with empty chain new_chain = gemmi.Chain('X') new_chain.append_residues(b[:5], min_sep=1) self.assertEqual(len(new_chain), 5) # test adding and removing chains model.add_chain(new_chain, unique_name=False) model.add_chain(new_chain, unique_name=True) model.add_chain(new_chain) self.assertEqual([chain.name for chain in model], list('ABXCX')) del model[2:] model.add_chain(new_chain, unique_name=True) self.assertEqual([chain.name for chain in model], list('ABX')) del model[-1] del model['A'] self.assertEqual(len(model), 1) self.assertEqual(model[0].name, 'B') doc = st.make_mmcif_document() ref_seq = doc[0].get_mmcif_category('_struct_ref_seq') self.assertEqual(ref_seq['pdbx_strand_id'], ['B'])