Ejemplo n.º 1
0
def test_sequences_without_commonality_have_no_alignment(s1, s2):
    u1 = s1.difference(s2)
    u2 = s1.intersection(s2)
    assert u1.intersection(u2) == set()

    _, als = align(list(u1), list(u2), simple_score, simple_gap)
    assert (len(list(als))) == 0
Ejemplo n.º 2
0
def test_imperfect_alignment_is_found(chunk):
    separator = max(chunk) + 1
    a = list(chunk) * 2
    b = list(chunk) + [separator] + list(chunk)
    _, alignments = align(a, b, simple_score, simple_gap)
    als = list(alignments)

    # We should only find one alignment
    assert len(als) == 1

    # The alignment for both strings should start at 0
    assert als[0][0] == (0, 0)
Ejemplo n.º 3
0
def update(anchor, handle=None):
    """Update an anchor based on the current contents of its source file.

    Args:
        anchor: The `Anchor` to be updated.
        handle: File-like object containing contents of the anchor's file. If
            `None`, then this function will open the file and read it.

    Returns: A new `Anchor`, possibly identical to the input.

    Raises:
        ValueError: No alignments could be found between old anchor and new
            text.
        AlignmentError: If no anchor could be created. The message of the
            exception will say what the problem is.

    """
    if handle is None:
        with anchor.file_path.open(mode='rt') as fp:
            source_text = fp.read()
    else:
        source_text = handle.read()
        handle.seek(0)

    ctxt = anchor.context

    a_score, alignments = align(ctxt.full_text, source_text, score,
                                gap_penalty)
    # max_score = len(ctxt.full_text) * 3

    try:
        alignment = next(alignments)
    except StopIteration:
        raise AlignmentError('No alignments for anchor: {}'.format(anchor))

    anchor_offset = ctxt.offset - len(ctxt.before)

    source_indices = tuple(s_idx for (a_idx, s_idx) in alignment
                           if a_idx is not None if s_idx is not None
                           if _index_in_topic(a_idx + anchor_offset, anchor))

    if not source_indices:
        raise AlignmentError(
            "Best alignment does not map topic to updated source.")

    return make_anchor(file_path=anchor.file_path,
                       offset=source_indices[0],
                       width=len(source_indices),
                       context_width=anchor.context.width,
                       metadata=anchor.metadata,
                       handle=handle)
Ejemplo n.º 4
0
def test_alignment_finds_perfect_subset(prefix, match, suffix):
    larger = prefix + match + suffix
    _, alignments = align(match, larger, simple_score, simple_gap)
    als = [
        al for al in alignments
        # only perfect alignments
        if len(al) == len(match)
        # only alignments that start after `prefix`
        if al[0][1] == len(prefix)
    ]
    assert len(als) == 1
    actual = als[0]
    expected = tuple(
        zip(range(len(match)), range(len(prefix),
                                     len(prefix) + len(match))))
    assert actual == expected
Ejemplo n.º 5
0
def test_multiple_alignments(match, size):
    match = list(match)
    larger = match * size
    _, als = align(match, larger, simple_score, simple_gap)
    assert len(list(als)) == size
Ejemplo n.º 6
0
def test_alignments_are_no_longer_than_longest_input(s1, s2):
    max_input_len = max(len(s1), len(s2))

    _, als = align(s1, s2, simple_score, simple_gap)
    for al in als:
        assert len(al) <= max_input_len
Ejemplo n.º 7
0
def test_sequences_with_commonality_have_at_least_one_alignment(seqs):
    _, als = align(seqs[0], seqs[1], simple_score, simple_gap)
    assert len(list(als)) >= 1
Ejemplo n.º 8
0
def test_empty_sequences_have_no_alignment(seq):
    _, als = align(seq, [], simple_score, simple_gap)
    assert len(list(als)) == 0

    _, als = align([], seq, simple_score, simple_gap)
    assert len(list(als)) == 0