Example #1
0
def make_differing_words_bold(orig_sent, sys_sent, make_bold):
    '''Returns the two sentences with differing words in bold'''

    def format_words(words, mutual_words):
        '''Makes all words bold except the mutual ones'''
        words_generator = iter(words)
        formatted_string = ''
        for mutual_word in mutual_words:
            word = next(words_generator)
            bold_text = ''
            while word != mutual_word:
                bold_text += ' ' + word
                word = next(words_generator)
            if bold_text != '':
                formatted_string += ' ' + make_bold(bold_text)
            formatted_string += ' ' + word
        # Add remaining words
        formatted_string += ' ' + make_bold(' '.join(words_generator))
        return formatted_string.strip()

    orig_words = to_words(orig_sent)
    sys_words = to_words(sys_sent)
    mutual_words = get_lcs(orig_words, sys_words)
    return format_words(orig_words, mutual_words), format_words(sys_words, mutual_words)
Example #2
0
 def truncate(sentence):
     # Take first 80% words
     words = to_words(sentence)
     return ' '.join(words[:int(len(words) * 0.8)]) + '.'
Example #3
0
def get_qualitative_html_examples(orig_sents, sys_sents, refs_sents):
    title_key_print = [
        ('Randomly sampled simplifications',
         lambda c, s, refs: 0,
         lambda value: ''),
        ('Best simplifications according to SARI',
         lambda c, s, refs: -corpus_sari([c], [s], [refs]),
         lambda value: f'SARI={-value:.2f}'),
        ('Worst simplifications according to SARI',
         lambda c, s, refs: corpus_sari([c], [s], [refs]),
         lambda value: f'SARI={value:.2f}'),
        ('Simplifications with only one differing word',
         lambda c, s, refs: -(count_words(c) == count_words(s) == len(get_lcs(to_words(c), to_words(s))) + 1),
         lambda value: ''),
        ('Simplifications with the most compression',
         lambda c, s, refs: get_compression_ratio(c, s),
         lambda value: f'compression_ratio={value:.2f}'),
        ('Simplifications that are longer than the source',
         lambda c, s, refs: -get_compression_ratio(c, s),
         lambda value: f'compression_ratio={-value:.2f}'),
        ('Simplifications that paraphrase the source',
         lambda c, s, refs: get_levenshtein_similarity(c, s) / get_compression_ratio(c, s),
         lambda value: f'levenshtein_similarity={value:.2f}'),
        ('Simplifications that are the most similar to the source (excluding exact matches)',
         lambda c, s, refs: -get_levenshtein_similarity(c, s) * int(c != s),
         lambda value: f'levenshtein_similarity={-value:.2f}'),
        ('Simplifications with the most sentence splits (if there are any)',
         lambda c, s, refs: -count_sentence_splits(c, s),
         lambda value: f'nb_sentences_ratio={-value:.2f}'),
    ]

    def get_one_sample_html(orig_sent, sys_sent, ref_sents, sort_key, print_func):
        doc = Doc()
        with doc.tag('div', klass='mb-2 p-1'):
            # Sort key
            with doc.tag('div', klass='text-muted small'):
                doc.asis(print_func(sort_key(orig_sent, sys_sent, ref_sents)))
            with doc.tag('div', klass='ml-2'):
                orig_sent_bold, sys_sent_bold = make_differing_words_bold(orig_sent, sys_sent, make_text_bold_html)
                # Source
                with doc.tag('div'):
                    doc.asis(orig_sent_bold)
                # Prediction
                with doc.tag('div'):
                    doc.asis(sys_sent_bold)
                # References
                collapse_id = get_random_html_id()
                with doc.tag('div', klass='position-relative'):
                    with doc.tag('a', ('data-toggle', 'collapse'), ('href', f'#{collapse_id}'),
                                 klass='stretched-link small'):
                        doc.text('References')
                    with doc.tag('div', klass='collapse', id=collapse_id):
                        for ref_sent in refs:
                            _, ref_sent_bold = make_differing_words_bold(orig_sent, ref_sent, make_text_bold_html)
                            with doc.tag('div', klass='text-muted'):
                                doc.asis(ref_sent_bold)
        return doc.getvalue()

    doc = Doc()
    for title, sort_key, print_func in title_key_print:
        # stretched-link needs position-relative
        with doc.tag('div', klass='container-fluid mt-4 p-2 position-relative border'):
            doc.line('h3', klass='m-2', text_content=title)
            # Make whole div clickable to collapse / uncollapse examples
            collapse_id = get_random_html_id()
            with doc.tag('a', ('data-toggle', 'collapse'), ('href', f'#{collapse_id}'), klass='stretched-link'):
                pass  # doc.stag and doc.line don't seem to work with stretched-link
            # Now lets print the examples
            sample_generator = sorted(
                    zip(orig_sents, sys_sents, zip(*refs_sents)),
                    key=lambda args: sort_key(*args),
            )
            # Samples displayed by default
            with doc.tag('div', klass='collapse show', id=collapse_id):
                n_samples = 10
                for i, (orig_sent, sys_sent, refs) in enumerate(sample_generator):
                    if i >= n_samples:
                        break
                    doc.asis(get_one_sample_html(orig_sent, sys_sent, refs, sort_key, print_func))
    return doc.getvalue()