Exemple #1
0
    def testAppend(self):

        chain = self.chain.clone()
        chain.compute_torsion()
        assert chain.torsion is not None
        residue = structure.ProteinResidue(
            9999, structure.SequenceAlphabets.Protein.ALA, 9999, 'A')

        rank = chain.residues.append(residue)
        self.assertTrue(chain.residues._contains('9999A'))
        self.assertRaises(structure.InvalidOperation, lambda: chain.torsion)

        self.assertEqual(chain[-1], residue)
        self.assertEqual(chain.residues[59], residue)
        self.assertEqual(rank, 59)

        self.assertRaises(structure.DuplicateResidueIDError,
                          chain.residues.append, residue)
        # assert NOT raises DuplicateResidueIDError if id is None:
        residue = structure.ProteinResidue(
            99999, structure.SequenceAlphabets.Protein.ALA)
        chain.residues.append(residue)
        chain.residues.append(residue)
Exemple #2
0
def make_chain(coordinates, sequence=None, chainid='A'):
    """Creates a Chain instance from a coordinate array assuming that these are
    the positions of CA atoms. 
    """
    if sequence is None: sequence = ['ALA'] * len(coordinates)

    residues = []

    for i in range(len(sequence)):
        residue = structure.ProteinResidue(i+1, sequence[i],
                                           sequence_number=i+1)
        atom = structure.Atom(i+1, 'CA', 'C', coordinates[i])
        atom.occupancy = 1.0
        residue.atoms.append(atom)
        residues.append(residue)
        
    return structure.Chain(chainid, residues=residues)
Exemple #3
0
    def testAppendAtom(self):
        """
        @see: [CSB 0000122]
        """
        r = structure.ProteinResidue(1, sequence.ProteinAlphabet.ALA)

        a1 = structure.Atom(1,
                            'CA',
                            structure.ChemElements.C, [1, 1, 1],
                            alternate='A')
        a2 = structure.Atom(1,
                            'CA',
                            structure.ChemElements.C, [1, 1, 1],
                            alternate='B')

        r.atoms.append(a1)
        r.atoms.append(a2)

        self.assertEqual(a1.residue, r)
        self.assertEqual(a2.residue, r)
Exemple #4
0
    def testHasStructure(self):

        self.assertTrue(self.residue.has_structure)

        residue = structure.ProteinResidue(111, 'ALA')
        self.assertFalse(residue.has_structure)
Exemple #5
0
    def _parse_profile(self, hmm, units=ScoreUnits.LogScales):
        """
        Parse the HMM profile.

        @param hmm: the hmm object being constructed
        @type hmm: L{ProfileHMM}
        @return: the updated hmm
        @rtype: L{ProfileHMM}

        @raise NotImplementedError: when an unknown transition string is
                                    encountered
        """
        assert self._chopped

        # 0. Prepare start and end states
        hmm.start = State(States.Start)
        hmm.end = State(States.End)

        residues = None
        background = {}
        tran_types = None
        tran_lines = []
        start_probs = None

        lines = iter(self._profile)
        pattern = re.compile('^[A-Z\-]\s[0-9]+\s+')

        if units == ScoreUnits.LogScales:

            def parse_probability(v):
                if v.strip() == '*':
                    return None
                else:
                    return float(v)
        else:

            def parse_probability(v):
                if v.strip() == '*':
                    return None
                else:
                    return hmm._convert(units, float(v), hmm.scale,
                                        hmm.logbase)

        # 1. Create all layers (profile columns), create and attach their match states

        while True:
            try:
                line = next(lines)
            except StopIteration:
                break

            if line.startswith('NULL'):
                try:
                    backprobs = tuple(map(parse_probability, line.split()[1:]))

                    line = next(lines)
                    residues = line.split()[1:]
                    residues = [
                        Enum.parse(ProteinAlphabet, aa) for aa in residues
                    ]

                    for pos, aa in enumerate(residues):
                        background[aa] = backprobs[pos]

                    line = next(lines)
                    tran_types = line.split()

                    line = next(lines)
                    start_probs = list(map(parse_probability, line.split()))
                except StopIteration:
                    break

            elif re.match(pattern, line):
                emrow = line
                try:
                    tran_lines.append(next(lines))
                    #junkrow = next(lines)
                except StopIteration:
                    break

                emprobs = emrow.split()
                if len(emprobs) != 23:
                    raise HHProfileFormatError(
                        "Unexpected number of data fields: {0}".format(
                            len(emprobs)))

                rank = int(emprobs[1])
                residue = structure.ProteinResidue(rank=rank,
                                                   type=emprobs[0],
                                                   sequence_number=rank,
                                                   insertion_code=None)
                if residue.type == ProteinAlphabet.GAP:
                    raise HHProfileFormatError(
                        "Layer {0} can't be represented by a gap".format(rank))

                new_layer = hmm.layers.append(HMMLayer(rank, residue))
                if new_layer != rank:
                    raise HHProfileFormatError(
                        'Layer {0} defined as {1}'.format(new_layer, rank))

                match = State(States.Match, emit=Enum.members(ProteinAlphabet))

                match.rank = rank
                match.background.set(background)

                for col, aa in enumerate(residues):
                    prob = parse_probability(emprobs[col + 2])
                    match.emission.append(aa, prob)

                hmm.layers[new_layer].append(match)
                assert hmm.layers.last_index == match.rank

        # 2. Append starting transitions: S -> M[1] and optionally S -> D[1] and S -> I[0].
        #    States D[1] and I[0] will be created if needed
        #    Note that [0] is not a real layer, I[0] is simply an insertion at the level of Start
        if len(hmm.layers) > 0:

            first_match = hmm.layers[hmm.layers.start_index]

            if start_probs[0] is None:
                raise HHProfileFormatError(
                    "Transition Start > Match[1] is undefined")

            start_tran = Transition(hmm.start, first_match[States.Match],
                                    start_probs[0])
            hmm.start.transitions.append(start_tran)

            if start_probs[1] is not None and start_probs[
                    3] is not None:  # Start -> I[0] -> M[1]
                start_ins = State(States.Insertion,
                                  emit=Enum.members(ProteinAlphabet))
                start_ins.rank = 0
                start_ins.background.set(background)
                start_ins.emission = start_ins.background

                hmm.start_insertion = start_ins
                # Start -> I[0]
                hmm.start.transitions.append(
                    Transition(hmm.start, hmm.start_insertion, start_probs[1]))
                # I[0] -> M[1]
                hmm.start_insertion.transitions.append(
                    Transition(hmm.start_insertion, first_match[States.Match],
                               start_probs[3]))
                # I[0] -> I[0]
                if start_probs[4]:
                    hmm.start_insertion.transitions.append(
                        Transition(hmm.start_insertion, hmm.start_insertion,
                                   start_probs[4]))

            if start_probs[2] is None and start_probs[6] is not None:
                # M->D is corrupt (*) at the Start layer, using D->D instead
                start_probs[2] = start_probs[6]

            if start_probs[2] is not None:  # Start -> D[1]
                start_del = State(States.Deletion)
                start_del.rank = 1
                hmm.layers[1].append(start_del)
                start_tran = Transition(hmm.start,
                                        first_match[States.Deletion],
                                        start_probs[2])
                hmm.start.transitions.append(start_tran)
        else:
            start_tran = Transition(hmm.start, hmm.end, start_probs[0])
            hmm.start.transitions.append(start_tran)

        # 3. Append remaining transitions. I and D states will be created on demand.

        for rank, fields in enumerate(tran_lines,
                                      start=hmm.layers.start_index):
            assert hmm.layers[rank][States.Match].rank == rank

            ofields = fields.split()
            fields = tuple(map(parse_probability, ofields))

            # 3a. Parse all Neff values and create I[i] and D[i] states if NeffX[i] is not None
            for col, neff in enumerate(tran_types[7:10], start=7):

                if fields[col] is not None:
                    neff_value = float(ofields[col]) / abs(hmm.scale)

                    if neff == 'Neff':
                        hmm.layers[rank].effective_matches = neff_value

                    elif neff == 'Neff_I':
                        hmm.layers[rank].effective_insertions = neff_value

                        if States.Insertion not in hmm.layers[rank]:
                            insertion = State(
                                States.Insertion,
                                emit=Enum.members(ProteinAlphabet))
                            insertion.background.set(background)
                            insertion.emission.set(background)
                            insertion.rank = rank
                            hmm.layers[rank].append(insertion)

                    elif neff == 'Neff_D':
                        hmm.layers[rank].effective_deletions = neff_value

                        if States.Deletion not in hmm.layers[
                                rank] and neff_value > 0:
                            deletion = State(States.Deletion)
                            deletion.rank = rank
                            hmm.layers[rank].append(deletion)

            # 3b. Starting from the first layer, parse all transitions and build the HMM graph stepwise
            for col, tran in enumerate(tran_types):
                probability = fields[col]

                if probability is not None:
                    try:
                        self._add_transition(hmm, rank, tran, probability)
                    except (CollectionIndexError, ItemNotFoundError) as ex:
                        msg = "Can't add transition {0} at {1}: {2.__class__.__name__}, {2!s}"
                        raise HHProfileFormatError(msg.format(tran, rank, ex))

        return hmm