def testAppend(self): chain = self.chain.clone() chain.compute_torsion() assert chain.torsion is not None residue = structure.ProteinResidue( 9999, structure.SequenceAlphabets.Protein.ALA, 9999, 'A') rank = chain.residues.append(residue) self.assertTrue(chain.residues._contains('9999A')) self.assertRaises(structure.InvalidOperation, lambda: chain.torsion) self.assertEqual(chain[-1], residue) self.assertEqual(chain.residues[59], residue) self.assertEqual(rank, 59) self.assertRaises(structure.DuplicateResidueIDError, chain.residues.append, residue) # assert NOT raises DuplicateResidueIDError if id is None: residue = structure.ProteinResidue( 99999, structure.SequenceAlphabets.Protein.ALA) chain.residues.append(residue) chain.residues.append(residue)
def make_chain(coordinates, sequence=None, chainid='A'): """Creates a Chain instance from a coordinate array assuming that these are the positions of CA atoms. """ if sequence is None: sequence = ['ALA'] * len(coordinates) residues = [] for i in range(len(sequence)): residue = structure.ProteinResidue(i+1, sequence[i], sequence_number=i+1) atom = structure.Atom(i+1, 'CA', 'C', coordinates[i]) atom.occupancy = 1.0 residue.atoms.append(atom) residues.append(residue) return structure.Chain(chainid, residues=residues)
def testAppendAtom(self): """ @see: [CSB 0000122] """ r = structure.ProteinResidue(1, sequence.ProteinAlphabet.ALA) a1 = structure.Atom(1, 'CA', structure.ChemElements.C, [1, 1, 1], alternate='A') a2 = structure.Atom(1, 'CA', structure.ChemElements.C, [1, 1, 1], alternate='B') r.atoms.append(a1) r.atoms.append(a2) self.assertEqual(a1.residue, r) self.assertEqual(a2.residue, r)
def testHasStructure(self): self.assertTrue(self.residue.has_structure) residue = structure.ProteinResidue(111, 'ALA') self.assertFalse(residue.has_structure)
def _parse_profile(self, hmm, units=ScoreUnits.LogScales): """ Parse the HMM profile. @param hmm: the hmm object being constructed @type hmm: L{ProfileHMM} @return: the updated hmm @rtype: L{ProfileHMM} @raise NotImplementedError: when an unknown transition string is encountered """ assert self._chopped # 0. Prepare start and end states hmm.start = State(States.Start) hmm.end = State(States.End) residues = None background = {} tran_types = None tran_lines = [] start_probs = None lines = iter(self._profile) pattern = re.compile('^[A-Z\-]\s[0-9]+\s+') if units == ScoreUnits.LogScales: def parse_probability(v): if v.strip() == '*': return None else: return float(v) else: def parse_probability(v): if v.strip() == '*': return None else: return hmm._convert(units, float(v), hmm.scale, hmm.logbase) # 1. Create all layers (profile columns), create and attach their match states while True: try: line = next(lines) except StopIteration: break if line.startswith('NULL'): try: backprobs = tuple(map(parse_probability, line.split()[1:])) line = next(lines) residues = line.split()[1:] residues = [ Enum.parse(ProteinAlphabet, aa) for aa in residues ] for pos, aa in enumerate(residues): background[aa] = backprobs[pos] line = next(lines) tran_types = line.split() line = next(lines) start_probs = list(map(parse_probability, line.split())) except StopIteration: break elif re.match(pattern, line): emrow = line try: tran_lines.append(next(lines)) #junkrow = next(lines) except StopIteration: break emprobs = emrow.split() if len(emprobs) != 23: raise HHProfileFormatError( "Unexpected number of data fields: {0}".format( len(emprobs))) rank = int(emprobs[1]) residue = structure.ProteinResidue(rank=rank, type=emprobs[0], sequence_number=rank, insertion_code=None) if residue.type == ProteinAlphabet.GAP: raise HHProfileFormatError( "Layer {0} can't be represented by a gap".format(rank)) new_layer = hmm.layers.append(HMMLayer(rank, residue)) if new_layer != rank: raise HHProfileFormatError( 'Layer {0} defined as {1}'.format(new_layer, rank)) match = State(States.Match, emit=Enum.members(ProteinAlphabet)) match.rank = rank match.background.set(background) for col, aa in enumerate(residues): prob = parse_probability(emprobs[col + 2]) match.emission.append(aa, prob) hmm.layers[new_layer].append(match) assert hmm.layers.last_index == match.rank # 2. Append starting transitions: S -> M[1] and optionally S -> D[1] and S -> I[0]. # States D[1] and I[0] will be created if needed # Note that [0] is not a real layer, I[0] is simply an insertion at the level of Start if len(hmm.layers) > 0: first_match = hmm.layers[hmm.layers.start_index] if start_probs[0] is None: raise HHProfileFormatError( "Transition Start > Match[1] is undefined") start_tran = Transition(hmm.start, first_match[States.Match], start_probs[0]) hmm.start.transitions.append(start_tran) if start_probs[1] is not None and start_probs[ 3] is not None: # Start -> I[0] -> M[1] start_ins = State(States.Insertion, emit=Enum.members(ProteinAlphabet)) start_ins.rank = 0 start_ins.background.set(background) start_ins.emission = start_ins.background hmm.start_insertion = start_ins # Start -> I[0] hmm.start.transitions.append( Transition(hmm.start, hmm.start_insertion, start_probs[1])) # I[0] -> M[1] hmm.start_insertion.transitions.append( Transition(hmm.start_insertion, first_match[States.Match], start_probs[3])) # I[0] -> I[0] if start_probs[4]: hmm.start_insertion.transitions.append( Transition(hmm.start_insertion, hmm.start_insertion, start_probs[4])) if start_probs[2] is None and start_probs[6] is not None: # M->D is corrupt (*) at the Start layer, using D->D instead start_probs[2] = start_probs[6] if start_probs[2] is not None: # Start -> D[1] start_del = State(States.Deletion) start_del.rank = 1 hmm.layers[1].append(start_del) start_tran = Transition(hmm.start, first_match[States.Deletion], start_probs[2]) hmm.start.transitions.append(start_tran) else: start_tran = Transition(hmm.start, hmm.end, start_probs[0]) hmm.start.transitions.append(start_tran) # 3. Append remaining transitions. I and D states will be created on demand. for rank, fields in enumerate(tran_lines, start=hmm.layers.start_index): assert hmm.layers[rank][States.Match].rank == rank ofields = fields.split() fields = tuple(map(parse_probability, ofields)) # 3a. Parse all Neff values and create I[i] and D[i] states if NeffX[i] is not None for col, neff in enumerate(tran_types[7:10], start=7): if fields[col] is not None: neff_value = float(ofields[col]) / abs(hmm.scale) if neff == 'Neff': hmm.layers[rank].effective_matches = neff_value elif neff == 'Neff_I': hmm.layers[rank].effective_insertions = neff_value if States.Insertion not in hmm.layers[rank]: insertion = State( States.Insertion, emit=Enum.members(ProteinAlphabet)) insertion.background.set(background) insertion.emission.set(background) insertion.rank = rank hmm.layers[rank].append(insertion) elif neff == 'Neff_D': hmm.layers[rank].effective_deletions = neff_value if States.Deletion not in hmm.layers[ rank] and neff_value > 0: deletion = State(States.Deletion) deletion.rank = rank hmm.layers[rank].append(deletion) # 3b. Starting from the first layer, parse all transitions and build the HMM graph stepwise for col, tran in enumerate(tran_types): probability = fields[col] if probability is not None: try: self._add_transition(hmm, rank, tran, probability) except (CollectionIndexError, ItemNotFoundError) as ex: msg = "Can't add transition {0} at {1}: {2.__class__.__name__}, {2!s}" raise HHProfileFormatError(msg.format(tran, rank, ex)) return hmm