Beispiel #1
0
def get_document_store(document_store_type, similarity='dot_product'):
    """ TODO This method is taken from test/conftest.py but maybe should be within Haystack.
    Perhaps a class method of DocStore that just takes string for type of DocStore"""
    if document_store_type == "sql":
        if os.path.exists("haystack_test.db"):
            os.remove("haystack_test.db")
        document_store = SQLDocumentStore(url="sqlite:///haystack_test.db")
        assert document_store.get_document_count() == 0
    elif document_store_type == "memory":
        document_store = InMemoryDocumentStore()
    elif document_store_type == "elasticsearch":
        # make sure we start from a fresh index
        client = Elasticsearch()
        client.indices.delete(index='haystack_test*', ignore=[404])
        document_store = ElasticsearchDocumentStore(index="eval_document",
                                                    similarity=similarity,
                                                    timeout=3000)
    elif document_store_type in ("milvus_flat", "milvus_hnsw"):
        if document_store_type == "milvus_flat":
            index_type = IndexType.FLAT
            index_param = None
            search_param = None
        elif document_store_type == "milvus_hnsw":
            index_type = IndexType.HNSW
            index_param = {"M": 64, "efConstruction": 80}
            search_param = {"ef": 20}
        document_store = MilvusDocumentStore(similarity=similarity,
                                             index_type=index_type,
                                             index_param=index_param,
                                             search_param=search_param)
        assert document_store.get_document_count(index="eval_document") == 0
    elif document_store_type in ("faiss_flat", "faiss_hnsw"):
        if document_store_type == "faiss_flat":
            index_type = "Flat"
        elif document_store_type == "faiss_hnsw":
            index_type = "HNSW"
        status = subprocess.run(['docker rm -f haystack-postgres'], shell=True)
        time.sleep(1)
        status = subprocess.run([
            'docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres'
        ],
                                shell=True)
        time.sleep(6)
        status = subprocess.run([
            'docker exec haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'
        ],
                                shell=True)
        time.sleep(1)
        document_store = FAISSDocumentStore(
            sql_url="postgresql://*****:*****@localhost:5432/haystack",
            faiss_index_factory_str=index_type,
            similarity=similarity)
        assert document_store.get_document_count() == 0

    else:
        raise Exception(
            f"No document store fixture for '{document_store_type}'")
    return document_store
Beispiel #2
0
def get_document_store(document_store_type, es_similarity='cosine'):
    """ TODO This method is taken from test/conftest.py but maybe should be within Haystack.
    Perhaps a class method of DocStore that just takes string for type of DocStore"""
    if document_store_type == "sql":
        if os.path.exists("haystack_test.db"):
            os.remove("haystack_test.db")
        document_store = SQLDocumentStore(url="sqlite:///haystack_test.db")
    elif document_store_type == "memory":
        document_store = InMemoryDocumentStore()
    elif document_store_type == "elasticsearch":
        # make sure we start from a fresh index
        client = Elasticsearch()
        client.indices.delete(index='haystack_test*', ignore=[404])
        document_store = ElasticsearchDocumentStore(index="eval_document",
                                                    similarity=es_similarity)
    elif document_store_type in ("faiss_flat", "faiss_hnsw"):
        if document_store_type == "faiss_flat":
            index_type = "Flat"
        elif document_store_type == "faiss_hnsw":
            index_type = "HNSW"

        #TEMP FIX for issue with deleting docs
        # status = subprocess.run(
        #     ['docker rm -f haystack-postgres'],
        #     shell=True)
        # time.sleep(3)
        # try:
        #     document_store = FAISSDocumentStore(sql_url="postgresql://*****:*****@localhost:5432/haystack",
        #                                         faiss_index_factory_str=index_type)
        # except:
        # Launch a postgres instance & create empty DB
        # logger.info("Didn't find Postgres. Start a new instance...")
        status = subprocess.run(['docker rm -f haystack-postgres'], shell=True)
        time.sleep(1)
        status = subprocess.run([
            'docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres'
        ],
                                shell=True)
        time.sleep(3)
        status = subprocess.run([
            'docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"'
        ],
                                shell=True)
        time.sleep(1)
        document_store = FAISSDocumentStore(
            sql_url="postgresql://*****:*****@localhost:5432/haystack",
            faiss_index_factory_str=index_type)

    else:
        raise Exception(
            f"No document store fixture for '{document_store_type}'")
    assert document_store.get_document_count() == 0
    return document_store