Ejemplo n.º 1
0
    def get_labels(self):
        """label each slot in the sausage (O=correct X=incorrect)"""
        if self.correct():
            # everything is correct
            return ['O'] * self.num_slots()

        # align the ref and the best hyp
        a = Sequence(self.ref())
        b = Sequence(self.best_hyp())
        v = Vocabulary()
        aEncoded = v.encodeSequence(a)
        bEncoded = v.encodeSequence(b)
        scoring = SimpleScoring(2, -1)
        aligner = StrictGlobalSequenceAligner(scoring, -2)
        score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True)
        alignment = v.decodeSequenceAlignment(encodeds[0])

        # get labels according to alignment
        labels = []
        for a, b in zip(alignment.first, alignment.second):
            if a == b or a == '-' and b == '*DELETE*':
                labels.append('O')
            else:
                labels.append('X')
        return labels
Ejemplo n.º 2
0
def align_sequences(seq_a, seq_b):
    # Must escape '-' because alignment library uses it as a gap
    # marker.
    escaped_seq_a = ['\\-' if x == '-' else x for x in seq_a]
    escaped_seq_b = ['\\-' if x == '-' else x for x in seq_b]

    v = Vocabulary()
    encoded_a = v.encodeSequence(Sequence(escaped_seq_a))
    encoded_b = v.encodeSequence(Sequence(escaped_seq_b))

    scoring = SimpleScoring(matchScore=3, mismatchScore=-1)
    aligner = StrictGlobalSequenceAligner(scoring, gapScore=-2)
    _, encodeds = aligner.align(encoded_a, encoded_b, backtrace=True)
    return encodeds[0]