Beispiel #1
0
def test_common_intervals_len_5_c(alg):
    sig1 = (0, 1, 2, 3, 4)
    sig2 = (0, 2, 4, 1, 3)

    commons = [
        # len 4
        CommonInterval((1, 4), (1, 4)),

        # len 5
        CommonInterval((0, 4), (0, 4)),
    ]

    test_results(commons, alg, sig1, sig2)
Beispiel #2
0
def test_common_intervals_3_strings_len_5(alg):
    pi_1 = [0, 1, 2, 3, 4]
    pi_2 = [1, 0, 3, 4, 2]
    pi_3 = [1, 4, 3, 0, 2]

    commons = [

        # len 2
        CommonInterval((3, 4), (2, 3), (1, 2)),

        # len 5
        CommonInterval((0, 4), (0, 4), (0, 4)),
    ]

    test_results(commons, alg, pi_1, pi_2, pi_3)
Beispiel #3
0
def test_common_intervals_len_4(alg):
    sig1 = (0, 1, 2, 3)
    sig2 = (3, 0, 2, 1)

    commons = [
        # len 2
        CommonInterval((1, 2), (2, 3)),

        # len 3
        CommonInterval((0, 2), (1, 3)),

        # len 4
        CommonInterval((0, 3), (0, 3)),
    ]

    test_results(commons, alg, sig1, sig2)
Beispiel #4
0
def common_k(perm_id: Sequence, perms: Sequence[Sequence], sub_len: int):
    k_index = {}  # index will be lazily updated on request

    for w1 in window(perm_id, sub_len):
        charset = frozenset(w1)
        if in_others := find_in_others(k_index, charset, perms[1:]):
            yield CommonInterval((w1[0], w1[-1]), *in_others)
Beispiel #5
0
def trivial_common(perm_a: Sequence, perm_b: Sequence) -> List[CommonInterval]:
    commons = []
    for l in range(2, len(perm_a) + 1):
        for w1 in window(perm_a, l):
            chars1 = set(w1)

            if in_other := find_in_other(chars1, perm_b):
                commons.append(CommonInterval((w1[0], w1[-1]), in_other))
Beispiel #6
0
 def _intersects_with(cls, intervals: CommonIntervals,
                      ci: CommonInterval) -> IntervalList:
     """Find intervals that intersect with ci and are 'After' ci"""
     return [
         other for other in intervals
         if CommonInterval.intersect(ci, other)
         and ci.first_end <= other.first_end
     ]
Beispiel #7
0
def test_common_intervals_len_5_b(alg):
    sig1 = (0, 1, 2, 3, 4)
    sig2 = (3, 1, 4, 0, 2)

    commons = [
        # len 5
        CommonInterval((0, 4), (0, 4)),
    ]

    test_results(commons, alg, sig1, sig2)
Beispiel #8
0
def test_common_intervals_len_5(alg):
    sig1 = (0, 1, 2, 3, 4)
    sig2 = (4, 3, 0, 2, 1)

    commons = [
        # len 2
        CommonInterval((1, 2), (3, 4)),
        CommonInterval((3, 4), (0, 1)),

        # len 3
        CommonInterval((0, 2), (2, 4)),

        # len 4
        CommonInterval((0, 3), (1, 4)),

        # len 5
        CommonInterval((0, 4), (0, 4)),
    ]

    test_results(commons, alg, sig1, sig2)
Beispiel #9
0
    def chain_starting_with(self, ci: CommonInterval) -> List[CommonInterval]:
        lvl = self.reverse_index[ci]
        chain = []

        ci_index = self.nesting_levels[lvl].index(ci)
        items_after_ci = self.nesting_levels[lvl][ci_index + 1:]

        cur = ci
        for other in items_after_ci:
            if CommonInterval.intersect(cur, other):
                chain.append(other)
                cur = other
            else:
                break

        return [ci] + chain if chain else []
Beispiel #10
0
    def process_merged_chars(cls, raw_tree: PQTree,
                             translation: Tuple[Mapping[Tuple[ContextChar, ContextChar], MergedChar]],
                             perms):

        assert len(translation) == 1

        tree_copy = deepcopy(raw_tree)
        reverse_translation = invert_dict_multi_val(translation[0])
        for node, parent in tree_copy:
            if not isinstance(node, LeafNode):
                continue
            if not isinstance(node.ci.alt_sign, MergedChar):
                continue

            mc: MergedChar = node.ci.alt_sign
            cc1, cc2 = reverse_translation[mc][0]
            first_ci_start = node.ci.first_start
            first_leaf_ci = CommonInterval((first_ci_start, first_ci_start))
            first_leaf_ci.sign = cc1.char
            second_leaf_ci = CommonInterval((first_ci_start + 1, first_ci_start + 1))
            second_leaf_ci.sign = cc2.char
            first_leaf = LeafNode(first_leaf_ci)
            second_leaf = LeafNode(second_leaf_ci)
            new_qnode = QNode((first_ci_start, first_ci_start + 1)).with_children((first_leaf, second_leaf))
            # if parent is a P node just accept children as leafs,
            # same behaviour in case the chars always show up in the same order
            # otherwise add an Q node as a child - and split the node as its leaf children

            # if parent is a P node - transform merged char to child QNode
            # in case the parent is Q - if the merged char is first or last in children order
            # try lift leafs to parent -
            # otherwise, the merged char in the middle of its parent - same as P case

            # if isinstance(parent, PNode) or len(mc.char_orders) == 1:
            #     parent.replace_child(node, first_leaf, second_leaf)
            # else:
            #     parent.replace_child(node,
            #                          QNode((first_ci_start, first_ci_start + 1)).with_children(
            #                              (first_leaf, second_leaf)))
            if isinstance(parent, PNode):
                parent.replace_child(node, new_qnode)
            # elif node in [parent.children[0], parent.children[-1]] and mc.other_side_same:
            #     parent.replace_child(node, first_leaf, second_leaf)
            # elif mc.context_same:
            #     parent.replace_child(node, first_leaf, second_leaf)
            # else:
            #     parent.replace_child(node, new_qnode)
            elif cls.possible_raise_merged_char(perms, node, first_leaf, second_leaf, parent):
                parent.replace_child(node, first_leaf, second_leaf)
            else:
                parent.replace_child(node, new_qnode)

        return tree_copy
Beispiel #11
0
def known_example():
    """
    the one from:
    Gene Proximity Analysis across Whole Genomes via PQ Trees1
    """
    commons1 = [

        # len 1
        *[CommonInterval((i, i)) for i in range(9)],

        # len 2
        CommonInterval((0, 1), (6, 7), (0, 1)),
        CommonInterval((1, 2), (7, 8), (1, 2)),
        CommonInterval((3, 4), (2, 3), (5, 6)),
        CommonInterval((4, 5), (3, 4), (6, 7)),

        # len 3
        CommonInterval((0, 2), (6, 8), (0, 2)),
        CommonInterval((3, 5), (2, 4), (5, 7)),

        # len 4
        CommonInterval((3, 6), (2, 5), (4, 7)),

        # len 5
        CommonInterval((3, 7), (1, 5), (3, 7)),

        # len 6
        CommonInterval((3, 8), (0, 5), (3, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8), (0, 7)),

        # len 9
        CommonInterval((0, 8), (0, 8), (0, 8)),
    ]

    ir_intervals = ReduceIntervals.reduce(commons1)
    s = IntervalHierarchy.from_irreducible_intervals(ir_intervals)

    pqtree = PQTreeBuilder._from_s(s, None)
    assert pqtree.to_parens() == "[[0 1 2] [[[3 4 5] 6] 7] 8]"
Beispiel #12
0
def test_common_intervals_3_strings_len_9(alg):
    pi_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8]
    pi_2 = [8, 7, 3, 4, 5, 6, 0, 1, 2]
    pi_3 = [0, 1, 2, 7, 6, 3, 4, 5, 8]

    commons = [
        # len 2
        CommonInterval((0, 1), (6, 7), (0, 1)),
        CommonInterval((1, 2), (7, 8), (1, 2)),
        CommonInterval((3, 4), (2, 3), (5, 6)),
        CommonInterval((4, 5), (3, 4), (6, 7)),

        # len 3
        CommonInterval((0, 2), (6, 8), (0, 2)),
        CommonInterval((3, 5), (2, 4), (5, 7)),

        # len 4
        CommonInterval((3, 6), (2, 5), (4, 7)),

        # len 5
        CommonInterval((3, 7), (1, 5), (3, 7)),

        # len 6
        CommonInterval((3, 8), (0, 5), (3, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8), (0, 7)),

        # len 9
        CommonInterval((0, 8), (0, 8), (0, 8)),
    ]

    test_results(commons, alg, pi_1, pi_2, pi_3)
Beispiel #13
0
def test_common_intervals_len_9(alg):
    sig1 = (0, 1, 2, 3, 4, 5, 6, 7, 8)
    sig2 = (8, 7, 3, 4, 5, 6, 0, 1, 2)

    commons = [
        # len 2
        CommonInterval((0, 1), (6, 7)),
        CommonInterval((1, 2), (7, 8)),
        CommonInterval((3, 4), (2, 3)),
        CommonInterval((4, 5), (3, 4)),
        CommonInterval((5, 6), (4, 5)),
        CommonInterval((7, 8), (0, 1)),

        # len 3
        CommonInterval((0, 2), (6, 8)),
        CommonInterval((3, 5), (2, 4)),
        CommonInterval((4, 6), (3, 5)),

        # len 4
        CommonInterval((3, 6), (2, 5)),

        # len 4
        CommonInterval((3, 7), (1, 5)),
        # len 6
        CommonInterval((3, 8), (0, 5)),

        # len 7
        CommonInterval((0, 6), (2, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8)),

        # len 9
        CommonInterval((0, 8), (0, 8)),
    ]

    test_results(commons, alg, sig1, sig2)
Beispiel #14
0
def test_given_examples():
    # C = {[1, 2], [2, 3], [1, 3], [4, 5], [5, 6], [4, 6], [4, 7], [4, 8], [4, 9], [1, 8], [1, 9]}
    # I = {[1, 2], [2, 3],         [4, 5], [5, 6],         [4, 7], [4, 8], [4, 9], [1, 8]        }

    commons1 = [
        # len 2
        CommonInterval((0, 1), (6, 7), (0, 1)),
        CommonInterval((1, 2), (7, 8), (1, 2)),
        CommonInterval((3, 4), (2, 3), (5, 6)),
        CommonInterval((4, 5), (3, 4), (6, 7)),

        # len 3
        CommonInterval((0, 2), (6, 8), (0, 2)),
        CommonInterval((3, 5), (2, 4), (5, 7)),

        # len 4
        CommonInterval((3, 6), (2, 5), (4, 7)),

        # len 5
        CommonInterval((3, 7), (1, 5), (3, 7)),

        # len 6
        CommonInterval((3, 8), (0, 5), (3, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8), (0, 7)),

        # len 9
        CommonInterval((0, 8), (0, 8), (0, 8)),
    ]

    irreducible1 = {
        # len 2
        CommonInterval((0, 1), (6, 7), (0, 1)),
        CommonInterval((1, 2), (7, 8), (1, 2)),
        CommonInterval((3, 4), (2, 3), (5, 6)),
        CommonInterval((4, 5), (3, 4), (6, 7)),

        # len 4
        CommonInterval((3, 6), (2, 5), (4, 7)),

        # len 5
        CommonInterval((3, 7), (1, 5), (3, 7)),

        # len 6
        CommonInterval((3, 8), (0, 5), (3, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8), (0, 7)),
    }

    commons2 = [
        # len 2
        CommonInterval((0, 1)),
        CommonInterval((1, 2)),

        # len 3
        CommonInterval((0, 2)),

        # len 5
        CommonInterval((2, 7)),
        CommonInterval((3, 8)),
        CommonInterval((4, 9)),

        # len 7
        CommonInterval((0, 7)),

        # len 8
        CommonInterval((0, 8)),

        # len 9
        CommonInterval((0, 9)),
    ]

    irreducible2 = {
        # len 2
        CommonInterval((0, 1)),
        CommonInterval((1, 2)),

        # len 5
        CommonInterval((2, 7)),
        CommonInterval((3, 8)),
        CommonInterval((4, 9)),
    }

    assert ReduceIntervals.reduce(commons1) == irreducible1
    assert ReduceIntervals.reduce(commons2) == irreducible2
Beispiel #15
0
def produce_s_for_example():
    commons1 = [

        # len 1
        *[CommonInterval((i, i)) for i in range(9)],

        # len 2
        CommonInterval((0, 1), (6, 7), (0, 1)),
        CommonInterval((1, 2), (7, 8), (1, 2)),
        CommonInterval((3, 4), (2, 3), (5, 6)),
        CommonInterval((4, 5), (3, 4), (6, 7)),

        # len 3
        CommonInterval((0, 2), (6, 8), (0, 2)),
        CommonInterval((3, 5), (2, 4), (5, 7)),

        # len 4
        CommonInterval((3, 6), (2, 5), (4, 7)),

        # len 5
        CommonInterval((3, 7), (1, 5), (3, 7)),

        # len 6
        CommonInterval((3, 8), (0, 5), (3, 8)),

        # len 8
        CommonInterval((0, 7), (1, 8), (0, 7)),

        # len 9
        CommonInterval((0, 8), (0, 8), (0, 8)),
    ]

    ir_intervals = ReduceIntervals.reduce(commons1)
    s = IntervalHierarchy.from_irreducible_intervals(ir_intervals)

    pprint(s.nesting_levels)
Beispiel #16
0
def common_k(perm_id: Sequence, perms: Sequence[Sequence], k: int):
    for w1 in window(perm_id, k):
        chars1 = set(w1)
        if in_others := find_in_others(chars1, perms[1:]):
            yield CommonInterval((w1[0], w1[-1]), *in_others)