예제 #1
0
    def m_step(self, all_word_paths):
        # keep a dictionary of score-weighted pseudocounts of aligned elements
        pseudocounts = {}

        for word in all_word_paths:
            # get the total score to scale the paths
            # word_total_score = logSum([path.get_score() for path in word])
            if not len(word):
                continue

            word_total_score = logSum([path[1] for path in word \
                                        if not isinf(path[1])])

            for path in word:
                '''
                path_elements = path.get_elements()
                path_score = path.get_score()
                '''
                path_elements, path_score = path

                path_score_scaled = path_score - word_total_score

                for a_element, b_element in path_elements:
                    if a_element not in self.alignment_scores[-1]:
                        a_element = None
                    if a_element not in pseudocounts:
                        pseudocounts[a_element] = {}

                    if b_element not in pseudocounts[a_element] \
                        or isinf(pseudocounts[a_element][b_element]):
                        pseudocounts[a_element][b_element] = path_score_scaled
                    else:
                        pseudocounts[a_element][b_element] = \
                            logAdd(path_score_scaled,
                                   pseudocounts[a_element][b_element])

        self.pseudocounts.append(pseudocounts)

        # rescale all the pseudocounts so they sum to 1
        pseudoprobs = {}
        for a_element in pseudocounts:
            pseudoprobs[a_element] = {}
            a_element_total = logSum([v for v in \
                                        pseudocounts[a_element].values() \
                                        if not isinf(v)])
            for b_element in pseudocounts[a_element]:
                b_element_pseudocount = pseudocounts[a_element][b_element]
                if isinf(a_element_total):
                    b_element_prob = log(0)
                else:
                    b_element_prob = b_element_pseudocount - a_element_total
                pseudoprobs[a_element][b_element] = b_element_prob

        return pseudoprobs
예제 #2
0
    def m_step(self, all_word_paths):
        # keep a dictionary of score-weighted pseudocounts of aligned elements
        pseudocounts = {}

        for word in all_word_paths:
            # get the total score to scale the paths
            # word_total_score = logSum([path.get_score() for path in word])
            if not len(word):
                continue

            word_total_score = logSum([path[1] for path in word \
                                        if not isinf(path[1])])

            for path in word:
                '''
                path_elements = path.get_elements()
                path_score = path.get_score()
                '''
                path_elements, path_score = path

                path_score_scaled = path_score - word_total_score

                for a_element, b_element in path_elements:
                    if a_element not in self.alignment_scores[-1]:
                        a_element = None
                    if a_element not in pseudocounts:
                        pseudocounts[a_element] = {}

                    if b_element not in pseudocounts[a_element] \
                        or isinf(pseudocounts[a_element][b_element]):
                        pseudocounts[a_element][b_element] = path_score_scaled
                    else:
                        pseudocounts[a_element][b_element] = \
                            logAdd(path_score_scaled,
                                   pseudocounts[a_element][b_element])

        self.pseudocounts.append(pseudocounts)

        # rescale all the pseudocounts so they sum to 1
        pseudoprobs = {}
        for a_element in pseudocounts:
            pseudoprobs[a_element] = {}
            a_element_total = logSum([v for v in \
                                        pseudocounts[a_element].values() \
                                        if not isinf(v)])
            for b_element in pseudocounts[a_element]:
                b_element_pseudocount = pseudocounts[a_element][b_element]
                if isinf(a_element_total):
                    b_element_prob = log(0)
                else:
                    b_element_prob = b_element_pseudocount - a_element_total
                pseudoprobs[a_element][b_element] = b_element_prob

        return pseudoprobs
예제 #3
0
def convert_allowables(allowables, delete_prob, insert_prob):
    """ convert a dict of {letter: phone} allowables to a
        {letter : {phone : alignment_probability} dict """
    alignment_scores = {}

    for letter in allowables:
        alignment_scores[letter] = {}

        phones = allowables[letter]
        phone_scores = []

        for phone in phones:
            score = 0
            # penalize deletions
            if phone is None:
                score += log(delete_prob)
            # really penalize insertions
            if letter is None:
                score += log(insert_prob)
            phone_scores.append(score)

        total_phone_scores = logSum(phone_scores)
        for idx, score in enumerate(phone_scores):
            alignment_scores[letter][phones[idx]] = score - total_phone_scores

    return alignment_scores
예제 #4
0
def convert_allowables(allowables, delete_prob, insert_prob):
    """ convert a dict of {letter: phone} allowables to a
        {letter : {phone : alignment_probability} dict """
    alignment_scores = {}

    for letter in allowables:
        alignment_scores[letter] = {}

        phones = allowables[letter]
        phone_scores = []

        for phone in phones:
            score = 0
            # penalize deletions
            if (phone is None):
                score += log(delete_prob)
            # really penalize insertions
            if (letter is None):
                score += log(insert_prob)
            phone_scores.append(score)

        total_phone_scores = logSum(phone_scores)
        for idx, score in enumerate(phone_scores):
            alignment_scores[letter][phones[idx]] = score - total_phone_scores

    return alignment_scores