def test_multimodal_document_fail_bad_doctype(visual_embedding): # the multimodal document don't have any chunks with pytest.raises(BadDocType): md = MultimodalDocument() md.tags['id'] = 1 md.embedding = visual_embedding md.modality_content_mapping
def test_from_content_category_mapping(modality_content_mapping): md = MultimodalDocument.from_modality_content_mapping( modality_content_mapping=modality_content_mapping) assert len(md.modalities) == 2 assert 'visual' and 'textual' in md.modalities assert len(md.chunks) == 2 assert md.granularity == md.chunks[0].granularity - 1
def test_from_chunks_granularity_2(chunk_5, chunk_6): md = MultimodalDocument(chunks=[chunk_5, chunk_6]) assert len(md.modalities) == 2 assert 'visual' and 'textual' in md.modalities assert len(md.chunks) == 2 assert md.granularity == md.chunks[0].granularity - 1 assert md.chunks[0].granularity == 3
def multimodal_generator(): for i in range(0, 5): document = MultimodalDocument(modality_content_map={ '1': f'aaa {i}', '2': f'bbb {i}' }) yield document
def test_from_chunks_success(chunk_1, chunk_2): md = MultimodalDocument(chunks=[chunk_1, chunk_2]) assert len(md.modalities) == 2 assert 'visual' and 'textual' in md.modalities assert len(md.chunks) == 2 assert md.granularity == md.chunks[0].granularity - 1 assert md.chunks[0].granularity == 1
def query_generator(image_paths, text_queries): for image_path, text in zip(image_paths, text_queries): with open(image_path, 'rb') as fp: buffer = fp.read() yield MultimodalDocument(modality_content_map={ 'image': buffer, 'text': text })
def multimodal_document(chunk_1, chunk_2): with MultimodalDocument() as md: md.tags['id'] = 1 md.text = b'hello world' md.embedding = np.random.random([10 + np.random.randint(0, 1)]) md.chunks.add( chunk_1) # attach a document with embedding and without content md.chunks.add( chunk_2) # attach a document with content and without embedding yield md
def doc_with_multimodal_chunks(embeddings): doc = MultimodalDocument() chunk1 = Document() chunk2 = Document() chunk3 = Document() chunk1.modality = 'visual1' chunk2.modality = 'visual2' chunk3.modality = 'textual' chunk1.embedding = embeddings[0] chunk2.embedding = embeddings[1] chunk3.embedding = embeddings[2] chunk1.update_id() chunk2.update_id() chunk3.update_id() doc.update_id() doc.chunks.append(chunk1) doc.chunks.append(chunk2) doc.chunks.append(chunk3) return doc
def test_from_documents_set(): docs = [] for i in range(0, 3): doc = MultimodalDocument.from_modality_content_mapping({'modA': f'textA {i}', 'modB': f'textB {i}'}) docs.append(doc) for doc in MultimodalDocumentSet(docs): assert len(doc.chunks) == 2 for doc in MultimodalDocumentSet(DocumentSet(docs)): assert len(doc.chunks) == 2
def test_from_chunks_fail_multiple_granularity(chunk_1, chunk_2, chunk_4): """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks with different granularity value, expect all chunks has the same granularity value. """ with pytest.raises(BadDocType): MultimodalDocument.from_chunks(chunks=[chunk_1, chunk_2, chunk_4])
def test_from_chunks_fail_length_mismatch(chunk_1, chunk_2, chunk_3): """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks with 2 modalities. """ with pytest.raises(LengthMismatchException): MultimodalDocument.from_chunks(chunks=[chunk_1, chunk_2, chunk_3])
def test_from_chunks_fail_length_mismatch(chunk_1, chunk_2, chunk_3): """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks with 2 modalities. """ assert not MultimodalDocument(chunks=[chunk_1, chunk_2, chunk_3]).is_valid
def test_assert_granularity(chunk_1, chunk_6): with pytest.raises(BadDocType): md = MultimodalDocument(chunks=[chunk_1, chunk_6])
def test_multimodal_document_fail_bad_doctype(visual_embedding): # the multimodal document don't have any chunks md = MultimodalDocument() md.tags['id'] = 1 md.embedding = visual_embedding assert not md.is_valid