Example #1
0
def get_pygments_html_exact_dup(text, start, highlight_length, num_symbols_to_print, pygmenttextlexer):
    pygments_prefix = get_pygments_and_allow_to_wrap(
        mypatmat.get_this_much_string(text, start, highlight_length), pygmenttextlexer, {}
    )
    rest = num_symbols_to_print - highlight_length
    pygments_suffix = get_pygments_and_allow_to_wrap(
        mypatmat.get_this_much_string(text, start + highlight_length, rest), pygmenttextlexer, {}
    )
    return general.get_HTML_highlight(pygments_prefix, len(pygments_prefix)) + " " + pygments_suffix
Example #2
0
def get_dup_groups(
    MEAN_FACTOR_GROUP_ONLY_IF,
    mean_lcp,
    sa,
    lcp,
    text,
    pygmenttextlexer,
    special_syms=[],
    symbol_counts_in_string_ith_base_0=None,
    variables=None,
    VARIABLE_PLACEHOLDER=None,
    strings_arr=None,
    STRING_PLACEHOLDER=None,
    digits=None,
    DIGITS_PLACEHOLDER=None,
    types=None,
    TYPES_PLACEHOLDER=None,
    linenos_for_xp_dup=None,
    ssym_dt=None,
):
    duplications = []
    i = 0
    lgt = len(lcp)
    cutoff_write = CUTOFF_WRITE_FACTOR * lgt  # to avoid quadratic printing
    wrote_num = 0
    used_i = []
    num_groups = 0
    another_group = False

    symbol_line_nos = mypatmat.get_symbol_line_no_for_sym(text)

    working_with_xp_dup = True
    if symbol_counts_in_string_ith_base_0 == None:
        working_with_xp_dup = False

    for iii in xrange(lgt):
        used_i.append(0)

    max_indexed_lcp_vals = general.get_desc_sorted_and_indexed(lcp)
    for currmaxlcptuple in max_indexed_lcp_vals:
        lcp_pos = currmaxlcptuple[0]
        lcp_val = currmaxlcptuple[1]

        i = lcp_pos
        j = lcp_pos
        minlen = lgt
        strings = []

        another_group = False
        while (
            mypatmat.char_is_whitespace(text[sa[j]]) == False
            and j + 1 < lgt
            and lcp[j] > 0
            and lcp[j] >= MEAN_FACTOR_GROUP_ONLY_IF * mean_lcp
            and used_i[sa[j]] == 0
            and used_i[sa[j] + lcp[j] - 1] == 0
        ):
            another_group = True
            for jq in xrange(lcp[j]):
                used_i[sa[j] + jq] = 1

            start1 = sa[j]
            start2 = sa[j + 1]
            length = lcp[j]

            num_symbols_to_print = int(math.floor(length * FACTOR_TO_STORE_THIS_MUCH_MORE_OF_MATCH))
            if j == i:

                if working_with_xp_dup == False:
                    minlen = min(minlen, length)
                    strings.append(
                        (
                            symbol_line_nos[sa[j]],
                            general.get_str_ascii_char_and_special(
                                mypatmat.get_this_much_string(text, start1, num_symbols_to_print), special_syms
                            ),
                            length,
                        )
                    )

                else:

                    if DISPLAY_USING_PYGMENTS:

                        now_highlight_this_prefix_length = 0
                        pygments_html = get_pygments_html(
                            text,
                            start1,
                            length,
                            num_symbols_to_print,
                            symbol_counts_in_string_ith_base_0,
                            variables,
                            VARIABLE_PLACEHOLDER,
                            strings_arr,
                            STRING_PLACEHOLDER,
                            digits,
                            DIGITS_PLACEHOLDER,
                            types,
                            TYPES_PLACEHOLDER,
                            ssym_dt,
                            pygmenttextlexer,
                        )
                        strings.append((linenos_for_xp_dup[start1], pygments_html, length))

                    else:
                        (new_string, now_highlight_this_prefix_length) = get_xp_dup_actual_string(
                            text,
                            start1,
                            length,
                            num_symbols_to_print,
                            symbol_counts_in_string_ith_base_0,
                            variables,
                            VARIABLE_PLACEHOLDER,
                            strings_arr,
                            STRING_PLACEHOLDER,
                            digits,
                            DIGITS_PLACEHOLDER,
                            types,
                            TYPES_PLACEHOLDER,
                        )
                        strings.append(
                            (
                                linenos_for_xp_dup[start1],
                                general.get_HTML_highlight(new_string, len(new_string)),
                                length,
                            )
                        )
                    minlen = min(minlen, now_highlight_this_prefix_length)

            if working_with_xp_dup == False:
                minlen = min(minlen, length)
                strings.append(
                    (
                        symbol_line_nos[sa[j + 1]],
                        general.get_str_ascii_char_and_special(
                            mypatmat.get_this_much_string(text, start2, num_symbols_to_print), special_syms
                        ),
                        length,
                    )
                )
            else:
                if DISPLAY_USING_PYGMENTS:
                    now_highlight_this_prefix_length = 0
                    pygments_html = get_pygments_html(
                        text,
                        start2,
                        length,
                        num_symbols_to_print,
                        symbol_counts_in_string_ith_base_0,
                        variables,
                        VARIABLE_PLACEHOLDER,
                        strings_arr,
                        STRING_PLACEHOLDER,
                        digits,
                        DIGITS_PLACEHOLDER,
                        types,
                        TYPES_PLACEHOLDER,
                        ssym_dt,
                        pygmenttextlexer,
                    )
                    strings.append((linenos_for_xp_dup[start2], pygments_html, length))
                else:
                    (new_string, now_highlight_this_prefix_length) = get_xp_dup_actual_string(
                        text,
                        start2,
                        length,
                        num_symbols_to_print,
                        symbol_counts_in_string_ith_base_0,
                        variables,
                        VARIABLE_PLACEHOLDER,
                        strings_arr,
                        STRING_PLACEHOLDER,
                        digits,
                        DIGITS_PLACEHOLDER,
                        types,
                        TYPES_PLACEHOLDER,
                    )
                    strings.append(
                        (
                            linenos_for_xp_dup[start2],
                            general.get_HTML_highlight(new_string, now_highlight_this_prefix_length),
                            length,
                        )
                    )

                minlen = min(minlen, now_highlight_this_prefix_length)

            wrote_num += length
            if wrote_num > cutoff_write:
                break
            j += 1

        if (
            len(strings) >= MIN_GROUP_SIZE
            and mypatmat.str_has_at_least_this_much_whitespace(strings[0], MOST_PERCENT_WHITESPACE) == False
        ):
            num_strings = len(strings)
            for si in xrange(num_strings):
                (loc, curr, match_len) = strings[si]
                if working_with_xp_dup == False:

                    if DISPLAY_USING_PYGMENTS:
                        curr = get_pygments_html_exact_dup(curr, 0, match_len, len(curr), pygmenttextlexer)
                    else:
                        curr = general.get_HTML_highlight(curr, match_len)

                strings[si] = (loc, curr)
            duplications.append(general.sort_tuple_arr(strings, 0))
        if another_group:
            num_groups += 1
        if num_groups >= MAX_GROUPS:
            break

    duplications_html = ["<table>"]

    if len(duplications) > 0:
        groupnum = 1
        for group in duplications:
            duplications_html.append(
                '<tr><td colspan="4" style="border-top: 1px solid #000;"><h3><br>Group '
                + str(groupnum)
                + ":</h3><br></td></tr>"
            )
            groupnum += 1
            for (lineno, string) in group:
                duplications_html.append(
                    "<tr><td><b>line~"
                    + str(lineno)
                    + '</b></td><td style="border-right: 1px solid #000;">&nbsp;&nbsp;&nbsp;</td><td>&nbsp;&nbsp;&nbsp;</td><td>'
                    + string
                    + "</td></tr><tr><td><br><br></td></tr>"
                )
    duplications_html.append("</table>")

    return "".join(duplications_html)