Exemple #1
0
def test_get_index_from_index_path():
    index_path_1 = "/something/anserini/foo"
    index_path_2 = "/foo/bar"

    index_class = Index.get_index_from_index_path(index_path_1)
    assert index_class == AnseriniIndex

    index_class = Index.get_index_from_index_path(index_path_2)
    assert index_class is None
Exemple #2
0
    def get(self, request, *args, **kwargs):
        index_class = Index.get_index_from_index_path(request.GET["target_index"])
        index = index_class(request.GET["target_index"])
        doc_id = request.GET["doc_id"]
        context = {"doc": index.getdoc(doc_id)}

        return render(request, self.template_name, context)
Exemple #3
0
    def get_most_relevant_doc_based_on_config(config, query_string, target_index):
        """
        1. Instantiate various classes based on config
        2. Get the most relevant doc
        """
        # We still need to init a pipeline because it pre-processes some config params, and we rely on that to
        # construct paths e.t.c.
        config = config.copy()  # because we end up modifying config
        pipeline = Pipeline(config)
        pipeline.initialize(config)
        path_dict = pipeline.get_paths(config)
        index_path = target_index
        index_class = Index.get_index_from_index_path(index_path)
        index = index_class(pipeline.collection, index_path, None)  # TODO: Pass a proper index_key
        model_class = Reranker.ALL[config["reranker"]]
        tokenizer = NeuralQueryView.get_tokenizer(pipeline, config, index_class.name)
        embedding_holder = EmbeddingHolder.get_instance(config.get("embeddings", "glove6b"))
        trained_weight_path = path_dict["trained_weight_path"]
        config = NeuralQueryView.add_model_required_params_to_config(config, embedding_holder)

        return NeuralQueryView.do_query(
            config,
            query_string,
            pipeline,
            index,
            tokenizer,
            embedding_holder,
            model_class,
            trained_weight_path=trained_weight_path,
        )
Exemple #4
0
    def get(self, request, *args, **kwargs):
        index_class = Index.get_index_from_index_path(
            request.GET["target_index"])
        # Hack - set the collection and index key as None since we won't need them for the purpose of WSDM demo
        index = index_class(None, request.GET["target_index"], None)
        doc_id = request.GET["doc_id"]
        context = {"doc": index.getdoc(doc_id)}

        return render(request, self.template_name, context)
Exemple #5
0
    def get_bm25_results(query_string, target_index, b, k1):
        index_class = Index.get_index_from_index_path(target_index)
        index = index_class(target_index)

        bm25_kwargs = {"n": NUM_RESULTS_TO_SHOW}
        if b is not None:
            bm25_kwargs["b"] = b
        if k1 is not None:
            bm25_kwargs["k1"] = k1

        doc_ids, docs = BM25View.do_query(query_string, index, **bm25_kwargs)
        docs = [doc[:250] for doc in docs]
        collection = Collection.get_collection_from_index_path(index.index_path)
        relevances = collection.get_relevance(query_string, doc_ids) if collection is not None else [0] * len(doc_ids)
        result_dicts = NeuralQueryView.construct_result_dicts(doc_ids, docs, relevances)
        return result_dicts
Exemple #6
0
def test_create_index(tmpdir_as_cache, index_name):
    provide = {"collection": DummyCollection()}
    index = Index.create(index_name, provide=provide)
    assert not index.exists()
    index.create_index()
    assert index.exists()