def test_search_ids(): kiri = Kiri(local=True) docs = get_docs() docs[0].id = "123" kiri.upload(docs) results = kiri.search("another", ids=["123"]) assert len(results.results) == 1, "Invalid number of search results"
def test_doc_summarise(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.classify(["random"]) assert isinstance(out, dict)
def test_doc_summarise(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.emotion() assert type(out) == str
def test_doc_qa(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.qa("What are you?") assert type(out) == str
def test_upload_dup_id(): kiri = Kiri(local=True) docs = get_docs() for doc in docs: doc.id = "123" with pytest.raises(ValueError): kiri.upload(docs)
def test_upload(): kiri = Kiri(local=True) docs = get_docs() kiri.upload(docs) assert docs[0].vector is not None, "Document not vectorised" assert docs[1].vector is not None, "Document not vectorised" assert len( kiri._store.documents) == 2, "Incorrect number of documents in mem"
def test_upload_chunked(): kiri = Kiri(local=True) docs = get_chunked_docs(chunking_level=1) kiri.upload(docs) assert len( kiri._store.documents) == 2, "Incorrect number of documents in mem" for doc in docs: assert doc.vector is not None, "Document not vectorised" assert len(doc.chunk_vectors) == 2, "Invalid number of chunk vectors"
def test_qa_chunk(): kiri = Kiri(local=True) docs = get_chunked_docs() kiri.upload(docs) results = kiri.qa("another?") assert isinstance(results, list) for result in results: assert type(result[0]) == str assert isinstance(result[1], SearchResult)
- Node dependencies """ elastic = False if elastic: doc_store = ElasticDocStore("http://localhost:9000", doc_class=ElasticChunkedDocument, index="kiri_default") docs = [d["elastic"] for d in tech_docs] else: doc_store = InMemoryDocStore(doc_class=ChunkedDocument) docs = [d["memory"] for d in tech_docs] kiri = Kiri(doc_store, local=True) kiri.upload(docs) query = "" if len(argv) == 1: print("Supply a query when running this script") print("Usage: python doc_search.py \"<your query here>\"") exit(0) elif len(argv) == 2: query = argv[1] else: query = " ".join(argv[1:]) print(f"Query: {query}") results = kiri.search(query, max_results=3, min_score=0.01)
""" elastic = False if elastic: doc_store = ElasticDocStore("http://localhost:9000", doc_class=ElasticChunkedDocument, index="kiri_default") documents = big_n_docs["elastic"] else: doc_store = InMemoryDocStore(doc_class=ChunkedDocument) documents = big_n_docs["memory"] kiri = Kiri(doc_store, local=True) kiri.upload(documents) print("Hello! I'm a Kiri chatbot.") # Hold previous Q/A pairs for additional context session_qa = [] while True: try: question = input() answers = kiri.qa(question, prev_qa=session_qa) # Only showing the top-rated answer print(answers[0][0]) prev_qa = (question, answers[0][0]) session_qa += prev_qa print() except Exception as e: print("Something broke, try again.")
def test_search_max_results_chunk(): kiri = Kiri(local=True) docs = get_chunked_docs() kiri.upload(docs) results = kiri.search("another", max_results=1) assert len(results.results) == 1, "Invalid number of search results"
def test_search(): kiri = Kiri(local=True) docs = get_docs() kiri.upload(docs) results = kiri.search("another") assert len(results.results) == 2, "Invalid number of search results"
def test_upload_mixed_type(): kiri = Kiri(local=True) docs = [Document("a"), ChunkedDocument("b")] with pytest.raises(ValueError): kiri.upload(docs)