Ejemplo n.º 1
0
def tree_diff_children(list1, list2, hash_func, algorithm):
    # list1 and list2 are lists of etree Elements.
    if list1 == list2 == []:
        return []
    # Try to find the longest common substring, according to hash_func().
    # First we use element_hash_strict(), but then we use element_hash_loose()
    # as a fallback.
    best_size, offset1, offset2 = longest_common_substring([hash_func(el) for el in list1], [hash_func(el) for el in list2])
    result = []
    if best_size == 0:
        if hash_func == element_hash_strict:
            result.extend(tree_diff_children(list1, list2, element_hash_loose, algorithm))
        else:
            result.append(etree.Element('MULTITAG_HOLE'))
    if offset1 > 0 and offset2 > 0:
        # There's leftover stuff on the left side of BOTH lists.
        result.extend(tree_diff_children(list1[:offset1], list2[:offset2], element_hash_strict, algorithm))
    elif offset1 > 0 or offset2 > 0:
        # There's leftover stuff on the left side of ONLY ONE of the lists.
        result.append(etree.Element('MULTITAG_HOLE'))
    if best_size > 0:
        for i in range(best_size):
            child = tree_diff(list1[offset1+i], list2[offset2+i], algorithm)
            result.append(child)
        if (offset1 + best_size < len(list1)) and (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of BOTH lists.
            result.extend(tree_diff_children(list1[offset1+best_size:], list2[offset2+best_size:], element_hash_strict, algorithm))
        elif (offset1 + best_size < len(list1)) or (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of ONLY ONE of the lists.
            result.append(etree.Element('MULTITAG_HOLE'))
    return result
Ejemplo n.º 2
0
def tree_extract_children(list1, list2, hash_func, algorithm):
    # list1 and list2 are lists of etree Elements.
    if list1 == list2 == []:
        return []
    best_size, offset1, offset2 = longest_common_substring([hash_func(el) for el in list1], [hash_func(el) for el in list2])
    result = []
    if best_size == 0:
        if [el.tag for el in list1] == ['MULTITAG_HOLE']:
            data = ''.join([etree.tostring(child, method='html') for child in list2])
            result.append({'type': 'multitag', 'value': data, 'tag': None})
        elif hash_func == element_hash_strict:
            result.extend(tree_extract_children(list1, list2, element_hash_loose, algorithm))
        else:
            raise NoMatch('Brain tag had children %r, but sample had %r' % (list1, list2))
    if offset1 > 0 and offset2 > 0:
        # There's leftover stuff on the left side of BOTH lists.
        result.extend(tree_extract_children(list1[:offset1], list2[:offset2], element_hash_strict, algorithm))
    elif offset1 > 0:
        # There's leftover stuff on the left side of ONLY the brain.
        if [el.tag for el in list1[:offset1]] == ['MULTITAG_HOLE']:
            result.append({'type': 'multitag', 'value': '', 'tag': None})
        else:
            raise NoMatch('Brain tag had children %r, but sample had %r' % (list1[:offset1], list2))
    elif offset2 > 0:
        # There's leftover stuff on the left side of ONLY the sample.
        raise NoMatch('Brain tag had children %r, but sample had %r' % (list1, list2))
    if best_size > 0:
        for i in range(best_size):
            child_result = tree_extract(list1[offset1+i], list2[offset2+i], algorithm)
            result.extend(child_result)
        if (offset1 + best_size < len(list1)) or (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of EITHER list.
            child_result = tree_extract_children(list1[offset1+best_size:], list2[offset2+best_size:], element_hash_strict, algorithm)
            result.extend(child_result)
    return result
Ejemplo n.º 3
0
def tree_diff_children(list1, list2, hash_func, algorithm):
    # list1 and list2 are lists of etree Elements.
    if list1 == list2 == []:
        return []
    # Try to find the longest common substring, according to hash_func().
    # First we use element_hash_strict(), but then we use element_hash_loose()
    # as a fallback.
    best_size, offset1, offset2 = longest_common_substring([hash_func(el) for el in list1], [hash_func(el) for el in list2])
    result = []
    if best_size == 0:
        if hash_func == element_hash_strict:
            result.extend(tree_diff_children(list1, list2, element_hash_loose, algorithm))
        else:
            result.append(etree.Element('MULTITAG_HOLE'))
    if offset1 > 0 and offset2 > 0:
        # There's leftover stuff on the left side of BOTH lists.
        result.extend(tree_diff_children(list1[:offset1], list2[:offset2], element_hash_strict, algorithm))
    elif offset1 > 0 or offset2 > 0:
        # There's leftover stuff on the left side of ONLY ONE of the lists.
        result.append(etree.Element('MULTITAG_HOLE'))
    if best_size > 0:
        for i in range(best_size):
            child = tree_diff(list1[offset1+i], list2[offset2+i], algorithm)
            result.append(child)
        if (offset1 + best_size < len(list1)) and (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of BOTH lists.
            result.extend(tree_diff_children(list1[offset1+best_size:], list2[offset2+best_size:], element_hash_strict, algorithm))
        elif (offset1 + best_size < len(list1)) or (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of ONLY ONE of the lists.
            result.append(etree.Element('MULTITAG_HOLE'))
    return result
Ejemplo n.º 4
0
def tree_extract_children(list1, list2, hash_func, algorithm):
    # list1 and list2 are lists of etree Elements.
    if list1 == list2 == []:
        return []
    best_size, offset1, offset2 = longest_common_substring([hash_func(el) for el in list1], [hash_func(el) for el in list2])
    result = []
    if best_size == 0:
        if [el.tag for el in list1] == ['MULTITAG_HOLE']:
            data = ''.join([etree.tostring(child, method='html') for child in list2])
            result.append({'type': 'multitag', 'value': data, 'tag': None})
        elif hash_func == element_hash_strict:
            result.extend(tree_extract_children(list1, list2, element_hash_loose, algorithm))
        else:
            raise NoMatch('Brain tag had children %r, but sample had %r' % (list1, list2))
    if offset1 > 0 and offset2 > 0:
        # There's leftover stuff on the left side of BOTH lists.
        result.extend(tree_extract_children(list1[:offset1], list2[:offset2], element_hash_strict, algorithm))
    elif offset1 > 0:
        # There's leftover stuff on the left side of ONLY the brain.
        if [el.tag for el in list1[:offset1]] == ['MULTITAG_HOLE']:
            result.append({'type': 'multitag', 'value': '', 'tag': None})
        else:
            raise NoMatch('Brain tag had children %r, but sample had %r' % (list1[:offset1], list2))
    elif offset2 > 0:
        # There's leftover stuff on the left side of ONLY the sample.
        raise NoMatch('Brain tag had children %r, but sample had %r' % (list1, list2))
    if best_size > 0:
        for i in range(best_size):
            child_result = tree_extract(list1[offset1+i], list2[offset2+i], algorithm)
            result.extend(child_result)
        if (offset1 + best_size < len(list1)) or (offset2 + best_size < len(list2)):
            # There's leftover stuff on the right side of EITHER list.
            child_result = tree_extract_children(list1[offset1+best_size:], list2[offset2+best_size:], element_hash_strict, algorithm)
            result.extend(child_result)
    return result
Ejemplo n.º 5
0
 def LCS(self, seq1, seq2):
     return longest_common_substring(seq1, seq2)
Ejemplo n.º 6
0
 def LCS(self, seq1, seq2):
     return longest_common_substring(seq1, seq2)