Example #1
0
    def test_tokenization(self):
        """
        The whether the elasticsearch analyzer yields the right tokens for the german analyzer.

        Check the comments in mainapp.documents.index for more details
        """
        tokenizations = {
            "die": [],
            "hunde": ["hunde", "hund"],
            "wi-fi": ["wi", "fi"],
            "Feuerwehr": ["feuerwehr"],  # Would ideally split the words
            "oktopoden": ["oktopoden", "oktopod"],
            "Äpfel": ["äpfel", "apfel"],
            "ging": ["ging"],
            "schwierigste": ["schwierigste", "schwierig"],
            "1234/89": ["1234", "89"],  # Would be better if it included "1234/89"
        }

        text_analyzer = get_text_analyzer("german")
        elastic_index = Index("mst-test-tokenization")
        if not elastic_index.exists():
            elastic_index.create()
        elastic_index.close()
        elastic_index.analyzer(text_analyzer)
        elastic_index.save()
        elastic_index.open()
        elastic_index.flush()

        for word, expected_tokens in tokenizations.items():
            analysis = elastic_index.analyze(
                body={"analyzer": "text_analyzer", "text": word}
            )
            actual_tokens = [i["token"] for i in analysis["tokens"]]
            self.assertEqual(expected_tokens, actual_tokens, "Word was {}".format(word))
Example #2
0
def rebuild_index_old(request):
    index_name=request.GET.get("name","profils")
    es = Index(index_name,using="default")
    if es.exists():es.delete()
    es.create("default")
    es.save("default")
    return Response({"message": "Reconstruction de l'index "+index_name+" terminée"})