コード例 #1
0
ファイル: test_slice_ql.py プロジェクト: JHP4911/JINA
def test_slice_ql_on_matches_and_chunks():
    docs = random_docs_with_chunks_and_matches(10)
    driver = SliceQL(start=0, end=2, traversal_paths=('cc', 'c', 'r', 'mm', 'm'))
    assert len(docs) == 10
    assert len(docs[0].chunks) == 10
    assert len(docs[-1].chunks) == 10
    assert len(docs[0].matches) == 10
    assert len(docs[-1].matches) == 10
    assert len(docs[0].matches[0].chunks) == 10
    assert len(docs[0].matches[-1].chunks) == 10
    assert len(docs[-1].matches[0].chunks) == 10
    assert len(docs[-1].matches[-1].chunks) == 10
    assert len(docs[0].chunks[0].chunks) == 10
    assert len(docs[0].chunks[0].matches) == 10
    assert len(docs[0].chunks[0].matches[0].chunks) == 10
    assert len(docs[0].chunks[0].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches[0].chunks) == 10
    assert len(docs[0].chunks[-1].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches) == 10
    assert len(docs[-1].chunks[0].chunks) == 10
    assert len(docs[-1].chunks[0].matches) == 10
    assert len(docs[-1].chunks[-1].chunks) == 10
    assert len(docs[-1].chunks[-1].matches) == 10
    driver._traverse_apply(docs)
    assert len(docs) == 2

    assert len(docs[0].chunks) == 2  # slice on level 1
    assert len(docs[0].matches) == 2  # slice on level 1

    assert len(docs[0].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[0].chunks[-1].chunks) == 2  # slice on level 2 for chunks

    assert len(docs[0].chunks[0].matches) == 10  # traverses directly on matches
    assert len(docs[0].chunks[0].matches[0].chunks) == 10
    assert len(docs[0].chunks[0].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches) == 10  # traverses directly on matches
    assert len(docs[0].chunks[-1].matches[0].chunks) == 10
    assert len(docs[0].chunks[-1].matches[-1].chunks) == 10

    assert len(docs[0].matches[0].chunks) == 10
    assert len(docs[0].matches[-1].chunks) == 10

    assert len(docs[-1].chunks) == 2  # slice on level 1 of chunks
    assert len(docs[-1].matches) == 2  # slice on level 1 of chunks

    assert len(docs[-1].chunks[0].chunks) == 2  # slice on level 2 for matches of chunks
    assert len(docs[-1].chunks[-1].chunks) == 2  # slice on level 2 for matches of chunks

    assert len(docs[-1].chunks[0].matches) == 10  # traverses directly on matches
    assert len(docs[-1].chunks[0].matches[0].chunks) == 10
    assert len(docs[-1].chunks[0].matches[-1].chunks) == 10
    assert len(docs[-1].chunks[-1].matches) == 10  # traverses directly on matches
    assert len(docs[-1].chunks[-1].matches[0].chunks) == 10
    assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10

    assert len(docs[-1].matches[0].chunks) == 10
    assert len(docs[-1].matches[-1].chunks) == 10
コード例 #2
0
ファイル: test_slice_ql.py プロジェクト: tomzhang/jina
def test_slice_ql_on_matches_and_chunks():
    docs = random_docs_with_chunks_and_matches(10)
    driver = SliceQL(start=0, end=2, granularity_range=(0, 2), adjacency_range=(0, 2))
    assert len(docs) == 10
    assert len(docs[0].chunks) == 10
    assert len(docs[-1].chunks) == 10
    assert len(docs[0].matches) == 10
    assert len(docs[-1].matches) == 10
    assert len(docs[0].matches[0].chunks) == 10
    assert len(docs[0].matches[-1].chunks) == 10
    assert len(docs[-1].matches[0].chunks) == 10
    assert len(docs[-1].matches[-1].chunks) == 10
    assert len(docs[0].chunks[0].chunks) == 10
    assert len(docs[0].chunks[0].matches) == 10
    assert len(docs[0].chunks[0].matches[0].chunks) == 10
    assert len(docs[0].chunks[0].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches[0].chunks) == 10
    assert len(docs[0].chunks[-1].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches) == 10
    assert len(docs[-1].chunks[0].chunks) == 10
    assert len(docs[-1].chunks[0].matches) == 10
    assert len(docs[-1].chunks[-1].chunks) == 10
    assert len(docs[-1].chunks[-1].matches) == 10
    driver._traverse_apply(docs)
    assert len(docs) == 2

    assert len(docs[0].chunks) == 2  # slice on level 1
    assert len(docs[0].matches) == 2  # slice on level 1

    assert len(docs[0].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[0].chunks[-1].chunks) == 2  # slice on level 2 for chunks

    assert len(docs[0].chunks[0].matches) == 10  # traverses directly on matches
    assert len(docs[0].chunks[0].matches[0].chunks) == 10
    assert len(docs[0].chunks[0].matches[-1].chunks) == 10
    assert len(docs[0].chunks[-1].matches) == 10  # traverses directly on matches
    assert len(docs[0].chunks[-1].matches[0].chunks) == 10
    assert len(docs[0].chunks[-1].matches[-1].chunks) == 10

    assert len(docs[0].matches[0].chunks) == 10
    assert len(docs[0].matches[-1].chunks) == 10

    assert len(docs[-1].chunks) == 2  # slice on level 1 of chunks
    assert len(docs[-1].matches) == 2  # slice on level 1 of chunks

    assert len(docs[-1].chunks[0].chunks) == 2  # slice on level 2 for matches of chunks
    assert len(docs[-1].chunks[-1].chunks) == 2  # slice on level 2 for matches of chunks

    assert len(docs[-1].chunks[0].matches) == 10  # traverses directly on matches
    assert len(docs[-1].chunks[0].matches[0].chunks) == 10
    assert len(docs[-1].chunks[0].matches[-1].chunks) == 10
    assert len(docs[-1].chunks[-1].matches) == 10  # traverses directly on matches
    assert len(docs[-1].chunks[-1].matches[0].chunks) == 10
    assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10

    assert len(docs[-1].matches[0].chunks) == 10
    assert len(docs[-1].matches[-1].chunks) == 10
コード例 #3
0
def test_selection():
    docs = build_docs()
    driver = SliceQL(start=0, end=1, traversal_paths=['cmm', 'mcm'])
    driver._traverse_apply(docs)
    assert docs[0].chunks[0].matches[0].matches[0].granularity == 1
    assert docs[0].chunks[0].matches[0].matches[0].adjacency == 2
    assert len(docs[0].chunks[0].matches[0].matches) == 1
    assert docs[0].matches[0].chunks[0].matches[0].granularity == 1
    assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2
    assert len(docs[0].matches[0].chunks[0].matches) == 1
コード例 #4
0
def test_match_chunk():
    docs = build_docs()
    driver = SliceQL(start=0, end=1, traversal_paths=['mc'])
    driver._traverse_apply(docs)
    assert len(docs) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].chunks) == 1
    assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
コード例 #5
0
def test_slice_ql_on_chunks():
    docs = random_docs_with_chunks(10)
    driver = SliceQL(start=0, end=2, recur_on='chunks', recur_range=(0, 2))
    driver._traverse_apply(docs)
    assert len(docs) == 2
    assert len(docs[0].chunks) == 2  # slice on level 1
    assert len(docs[0].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[0].chunks[-1].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[-1].chunks) == 2  # slice on level 1
    assert len(docs[-1].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[-1].chunks[-1].chunks) == 2  # slice on level 2 for chunks
コード例 #6
0
ファイル: test_slice_ql.py プロジェクト: JHP4911/JINA
def test_slice_ql_on_chunks():
    docs = random_docs_with_chunks(10)
    driver = SliceQL(start=0, end=2, traversal_paths=('cc', 'c', 'r'))
    driver._traverse_apply(docs)
    assert len(docs) == 2
    assert len(docs[0].chunks) == 2  # slice on level 1
    assert len(docs[0].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[0].chunks[-1].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[-1].chunks) == 2  # slice on level 1
    assert len(docs[-1].chunks[0].chunks) == 2  # slice on level 2 for chunks
    assert len(docs[-1].chunks[-1].chunks) == 2  # slice on level 2 for chunks
コード例 #7
0
def test_traverse_apply():
    docs = build_docs()
    doc = docs[0]
    doc.ClearField('chunks')
    docs = [
        doc,
    ]
    driver = SliceQL(start=0, end=1, traversal_paths=['mcm'])
    assert docs[0].matches[0].chunks[0].matches[0].granularity == 1
    assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2
    driver._traverse_apply(docs)
    assert len(docs[0].matches[0].chunks[0].matches) == 1
コード例 #8
0
def test_both_from_0():
    docs = build_docs()
    driver = SliceQL(start=0,
                     end=1,
                     traversal_paths=['r', 'c', 'm', 'cc', 'mm'])
    driver._traverse_apply(docs)
    assert len(docs) == 1
    assert len(docs[0].chunks) == 1
    assert len(docs[0].chunks[0].chunks) == 1
    assert len(docs[0].chunks[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches) == 1
    assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches) == 1
    assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
コード例 #9
0
def test_only_adjacency():
    docs = build_docs()
    driver = SliceQL(start=0,
                     end=1,
                     adjacency_range=(0, 2),
                     granularity_range=(0, 0),
                     recur_on=["matches"])
    driver._traverse_apply(docs)
    assert len(docs) == 1
    assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches) == 1
    assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches) == 1
    assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
コード例 #10
0
def test_adjacency0_granularity1():
    docs = build_docs()
    driver = SliceQL(start=0, end=1, traversal_paths=['c', 'cc', 'cm', 'cmm'])
    driver._traverse_apply(docs)
    assert len(docs) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks) == 1
    assert len(docs[0].chunks[0].chunks) == 1
    assert len(docs[0].chunks[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches) == 1
    assert len(docs[0].chunks[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches[0].matches) == 1
    assert len(
        docs[0].chunks[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
コード例 #11
0
 def test_slice_ql_on_matches(self):
     docs = random_docs_with_matches(10)
     driver = SliceQL(start=0,
                      end=2,
                      traverse_on='matches',
                      depth_range=(0, 2))
     driver._traverse_apply(docs)
     assert len(docs) == 10  # traverses directly on matches
     assert len(docs[0].matches) == 2  # slice on level 1
     assert len(
         docs[0].matches[0].matches) == 2  # slice on level 2 for matches
     assert len(
         docs[0].matches[-1].matches) == 2  # slice on level 2 for matches
     assert len(docs[-1].matches) == 2  # slice on level 1
     assert len(
         docs[-1].matches[0].matches) == 2  # slice on level 2 for matches
     assert len(
         docs[-1].matches[-1].matches) == 2  # slice on level 2 for matches
コード例 #12
0
def test_traverse_apply():
    docs = build_docs()
    doc = docs[0]
    doc.ClearField('chunks')
    docs = [doc, ]
    driver = SliceQL(
        start=0,
        end=1,
        adjacency_range=(2, 2),
        granularity_range=(1, 1),
        recur_on=["matches", ]
    )
    # check we have a match with (g=1, a=2)
    assert docs[0].matches[0].chunks[0].matches[0].granularity == 1
    assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2
    # the following part will cause IndexError
    driver._traverse_apply(docs)
    assert len(docs[0].matches[0].chunks[0].matches) == 1
コード例 #13
0
def test_selection():
    docs = build_docs()
    driver = SliceQL(
        start=0,
        end=1,
        granularity_range=(1, 1),
        adjacency_range=(2, 2),
        recur_on=['matches', ]
    )
    driver._traverse_apply(docs)
    # check the granularity and adjacency
    assert docs[0].chunks[0].matches[0].matches[0].granularity == 1
    assert docs[0].chunks[0].matches[0].matches[0].adjacency == 2
    assert len(docs[0].chunks[0].matches[0].matches) == 1
    # check the granularity and adjacency
    assert docs[0].matches[0].chunks[0].matches[0].granularity == 1
    assert docs[0].matches[0].chunks[0].matches[0].adjacency == 2
    # For the chunks of the matches, the matches of these chunks are not effected by the apply()
    assert len(docs[0].matches[0].chunks[0].matches) == 1
コード例 #14
0
def test_adjacency_chunks():
    """
    This combination of `range` and `recur_on` will only `_apply_all` to the root node.
    Thus, the remaining document structure should still be intact.
    In practice this combination should not be chosen, anyhow it nicely demonstrates the behaviour.
    """
    docs = build_docs()
    driver = SliceQL(start=0,
                     end=1,
                     adjacency_range=(0, 1),
                     granularity_range=(0, 0),
                     recur_on=["chunks"])
    driver._traverse_apply(docs)
    assert len(docs) == 1
    assert len(docs[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].chunks[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches) == DOCUMENTS_PER_LEVEL
    assert len(docs[0].matches[0].matches[0].chunks) == DOCUMENTS_PER_LEVEL
コード例 #15
0
    def test_slice_ql_on_matches_and_chunks(self):
        docs = random_docs_with_chunks_and_matches(10)
        driver = SliceQL(start=0,
                         end=2,
                         traverse_on=['chunks', 'matches'],
                         depth_range=(0, 2))
        assert len(docs) == 10
        assert len(docs[0].chunks) == 10
        assert len(docs[-1].chunks) == 10
        assert len(docs[0].matches) == 10
        assert len(docs[-1].matches) == 10
        assert len(docs[0].matches[0].chunks) == 10
        assert len(docs[0].matches[-1].chunks) == 10
        assert len(docs[-1].matches[0].chunks) == 10
        assert len(docs[-1].matches[-1].chunks) == 10
        assert len(docs[0].chunks[0].chunks) == 10
        assert len(docs[0].chunks[0].matches) == 10
        assert len(docs[0].chunks[0].matches[0].chunks) == 10
        assert len(docs[0].chunks[0].matches[-1].chunks) == 10
        assert len(docs[0].chunks[-1].matches[0].chunks) == 10
        assert len(docs[0].chunks[-1].matches[-1].chunks) == 10
        assert len(docs[0].chunks[-1].chunks) == 10
        assert len(docs[0].chunks[-1].matches) == 10
        assert len(docs[-1].chunks[0].chunks) == 10
        assert len(docs[-1].chunks[0].matches) == 10
        assert len(docs[-1].chunks[-1].chunks) == 10
        assert len(docs[-1].chunks[-1].matches) == 10
        driver._traverse_apply(docs)
        assert len(docs) == 2

        assert len(docs[0].chunks) == 2  # slice on level 1
        assert len(docs[0].matches) == 2  # slice on level 1

        assert len(
            docs[0].chunks[0].chunks) == 2  # slice on level 2 for chunks
        assert len(
            docs[0].chunks[-1].chunks) == 2  # slice on level 2 for chunks

        assert len(
            docs[0].chunks[0].matches) == 10  # traverses directly on matches
        assert len(docs[0].chunks[0].matches[0].chunks) == 10
        assert len(docs[0].chunks[0].matches[-1].chunks) == 10
        assert len(
            docs[0].chunks[-1].matches) == 10  # traverses directly on matches
        assert len(docs[0].chunks[-1].matches[0].chunks) == 10
        assert len(docs[0].chunks[-1].matches[-1].chunks) == 10

        assert len(docs[0].matches[0].chunks) == 10
        assert len(docs[0].matches[-1].chunks) == 10

        assert len(docs[-1].chunks) == 2  # slice on level 1 of chunks
        assert len(docs[-1].matches) == 2  # slice on level 1 of chunks

        assert len(docs[-1].chunks[0].chunks
                   ) == 2  # slice on level 2 for matches of chunks
        assert len(docs[-1].chunks[-1].chunks
                   ) == 2  # slice on level 2 for matches of chunks

        assert len(
            docs[-1].chunks[0].matches) == 10  # traverses directly on matches
        assert len(docs[-1].chunks[0].matches[0].chunks) == 10
        assert len(docs[-1].chunks[0].matches[-1].chunks) == 10
        assert len(
            docs[-1].chunks[-1].matches) == 10  # traverses directly on matches
        assert len(docs[-1].chunks[-1].matches[0].chunks) == 10
        assert len(docs[-1].chunks[-1].matches[-1].chunks) == 10

        assert len(docs[-1].matches[0].chunks) == 10
        assert len(docs[-1].matches[-1].chunks) == 10