예제 #1
0
파일: fast_reassign.py 프로젝트: wpli/ptr
def find_new_partition_assignments(wordids_to_ideas, wordids, partitions_assignments_tuples, \
                                   ptr_data, new_ptr_params, pfst_params, alignment_library={}):
    new_partitions_assignments = {}
    for partition, assignment in partitions_assignments_tuples:
        partition_wordids = wordids[partition[0]:partition[1]]
        unigram_logprob = metrics.get_unigram_logprob(ptr_data, partition_wordids)
        unigram_logprob = metrics.get_unigram_logprob(ptr_data, partition_wordids)
        similar_ideas = _0020_match_ideas.find_similar_ideas(partition_wordids, wordids_to_ideas, new_ptr_params.ideas, num_ideas=5)
        matches = []
        for idx in similar_ideas:
            reference_idea = new_ptr_params.ideas[idx]
            ref_word_set = frozenset([reference_idea, partition_wordids])
            if ref_word_set in alignment_library:
                align_logprob = alignment_library[ref_word_set]
            else:
                alpha_matrix = align.forward_evaluate_log(reference_idea, partition_wordids, pfst_params)
                align_logprob = alpha_matrix[-1,-1]
                alignment_library[ref_word_set] = align_logprob

            if align_logprob > unigram_logprob:
                matches.append((align_logprob, idx))

        if len(matches) > 0:
            max_idea = max(matches, key=lambda x:x[0])[1]
            if new_ptr_params.ideas[max_idea] != partition_wordids:
                print ptr_data.get_string(new_ptr_params.ideas[max_idea])
                print ptr_data.get_string(partition_wordids)
                print
        else:
            max_idea = None
        new_partitions_assignments[partition] = max_idea
    return new_partitions_assignments, alignment_library
예제 #2
0
파일: metrics.py 프로젝트: wpli/ptr
def get_align_logprob(ref_idea, query_idea, align_scorer=None):
    alpha_matrix = align.forward_evaluate_log(ref_idea, query_idea, align_scorer)
    logprob_passage = alpha_matrix[-1,-1]
    return logprob_passage
    
    #logprob_passage = 0.0
    
    #aligned_passages = align.AlignedPassages(ref_idea, query_idea, align_scorer)
    #aligned_passages.global_align()
    #logprob_passage = aligned_passages.final_score
    #ops = aligned_passages.alignment_operations
    #if len(aligned_passages.alignment_operations) != len(aligned_passages.query_passage):
    #    print aligned_passages.alignment_operations
    #    print aligned_passages.aligned_reference_passage
    #    print aligned_passages.aligned_query_passage
    #    print

    
    """