Exemple #1
0
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.secondary_index = reindex_test_dataset()
        self.index, is_created = Index.objects.get_or_create(
            name=self.secondary_index)
        self.user = create_test_user('annotator', '*****@*****.**', 'pw')
        self.user2 = create_test_user('annotator2', '*****@*****.**', 'pw2')
        self.project = project_creation("entityTestProject",
                                        self.test_index_name, self.user)
        self.project.indices.add(self.index)
        self.project.users.add(self.user)
        self.project.users.add(self.user2)

        self.client.login(username='******', password='******')
        self.ec = ElasticCore()

        self.list_view_url = reverse("v2:annotator-list",
                                     kwargs={"project_pk": self.project.pk})
        self.annotator = self._create_annotator()
        self.pull_document_url = reverse("v2:annotator-pull-document",
                                         kwargs={
                                             "project_pk": self.project.pk,
                                             "pk": self.annotator["id"]
                                         })
Exemple #2
0
    def setUp(self):
        # Owner of the project
        self.test_index = reindex_test_dataset(from_index=TEST_INDEX_ENTITY_EVALUATOR)
        self.user = create_test_user("EvaluatorOwner", "*****@*****.**", "pw")
        self.project = project_creation("EvaluatorTestProject", self.test_index, self.user)
        self.project.users.add(self.user)
        self.url = f"{TEST_VERSION_PREFIX}/projects/{self.project.id}/evaluators/"
        self.project_url = f"{TEST_VERSION_PREFIX}/projects/{self.project.id}"

        self.true_fact_name = "PER"
        self.pred_fact_name = "PER_CRF_30"

        self.true_fact_name_sent_index = "PER_SENT"
        self.pred_fact_name_sent_index = "PER_CRF_31_SENT"

        self.fact_name_no_spans = "PER_FN_REGEX_NO_SPANS"

        self.fact_name_different_doc_paths = "PER_DOUBLE"

        self.core_variables_url = f"{TEST_VERSION_PREFIX}/core_variables/5/"

        # TODO! Construct a test query
        self.fact_names_to_filter = [self.true_fact_name, self.pred_fact_name]
        self.test_query = Query()
        self.test_query.add_facts_filter(self.fact_names_to_filter, [], operator="must")
        self.test_query = self.test_query.__dict__()

        self.client.login(username="******", password="******")

        self.token_based_evaluator_id = None
        self.value_based_evaluator_id = None
        self.token_based_sent_index_evaluator_id = None
        self.value_based_sent_index_evaluator_id = None
Exemple #3
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('first_user', '*****@*****.**', 'pw')
        self.project = project_creation("DocumentImporterAPI",
                                        self.test_index_name, self.user)

        self.validation_project = project_creation("validation_project",
                                                   "random_index_name",
                                                   self.user)

        self.document_id = random.randint(10000000, 90000000)
        self.uuid = uuid.uuid1()
        self.source = {"hello": "world", "uuid": self.uuid}
        self.document = {
            "_index": self.test_index_name,
            "_id": self.document_id,
            "_source": self.source
        }

        self.target_field_random_key = uuid.uuid1()
        self.target_field = f"{self.target_field_random_key}_court_case"
        self.ec = ElasticCore()

        self.client.login(username='******', password='******')
        self._check_inserting_documents()
Exemple #4
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('first_user', '*****@*****.**', 'pw')
        self.project = project_creation("FactManagementApplicationTests",
                                        self.test_index_name, self.user)

        self.uuid = uuid.uuid1().hex
        self.content = "miks sa oled loll!?"
        self.source = {
            TEST_FIELD:
            self.content,
            TEXTA_TAGS_KEY: [
                {
                    "str_val": "politsei",
                    "fact": "ORG",
                    "spans": json.dumps([[0, 0]]),
                    "doc_path": "hello"
                },
            ]
        }
        self.ec = ElasticCore()
        self.ec.es.index(index=self.test_index_name,
                         id=self.uuid,
                         body=self.source,
                         refresh="wait_for")
        self.kwargs = {"project_pk": self.project.pk}
        self.client.login(username='******', password='******')
Exemple #5
0
 def setUp(self):
     # Owner of the project
     self.test_index_name = reindex_test_dataset()
     self.user = create_test_user('taggerOwner', '*****@*****.**', 'pw')
     self.project = project_creation("taggerTestProject",
                                     self.test_index_name, self.user)
     self.project.users.add(self.user)
     self.client.login(username='******', password='******')
 def setUp(self) -> None:
     self.test_index_name = reindex_test_dataset()
     self.user = create_test_user('langDetectUser', '*****@*****.**', 'pw')
     self.non_project_user = create_test_user('langDetectUserThatIsNotInProject', '*****@*****.**', 'pw')
     self.project = project_creation("langDetectProject", self.test_index_name, self.user)
     self.project.users.add(self.user)
     self.client.login(username='******', password='******')
     self.url = reverse("v2:lang_index-list", kwargs={"project_pk": self.project.pk})
Exemple #7
0
    def setUpTestData(cls):
        cls.test_index_name = reindex_test_dataset()
        cls.user = create_test_user('user', '*****@*****.**', 'pw')
        cls.project = project_creation("AnonymizerTestProject",
                                       cls.test_index_name, cls.user)
        cls.project.users.add(cls.user)
        cls.url = f'{TEST_VERSION_PREFIX}/projects/{cls.project.id}/anonymizers/'

        cls.anonymizer_id = None
Exemple #8
0
 def setUp(self):
     self.test_index_name = reindex_test_dataset()
     self.ec = ElasticCore()
     self.user = create_test_user('mlpUser', '*****@*****.**', 'pw')
     self.project = project_creation("mlpTestProject", self.test_index_name,
                                     self.user)
     self.project.users.add(self.user)
     self.client.login(username='******', password='******')
     self.url = reverse(f"{VERSION_NAMESPACE}:mlp_index-list",
                        kwargs={"project_pk": self.project.pk})
Exemple #9
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('embeddingOwner', '*****@*****.**', 'pw')
        self.project = project_creation("embeddingTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)

        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/embeddings/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'
        self.test_embedding_id = None
        self.client.login(username='******', password='******')
Exemple #10
0
    def setUp(self):
        # Owner of the project
        self.test_index = reindex_test_dataset(from_index=TEST_INDEX_EVALUATOR)
        self.user = create_test_user("EvaluatorOwner", "*****@*****.**", "pw")
        self.project = project_creation("EvaluatorTestProject",
                                        self.test_index, self.user)
        self.project.users.add(self.user)
        self.url = f"{TEST_VERSION_PREFIX}/projects/{self.project.id}/evaluators/"
        self.project_url = f"{TEST_VERSION_PREFIX}/projects/{self.project.id}"

        self.multilabel_avg_functions = choices.MULTILABEL_AVG_FUNCTIONS
        self.binary_avg_functions = choices.BINARY_AVG_FUNCTIONS

        self.multilabel_evaluators = {
            avg: None
            for avg in self.multilabel_avg_functions
        }
        self.binary_evaluators = {
            avg: None
            for avg in self.binary_avg_functions
        }

        self.memory_optimized_multilabel_evaluators = {
            avg: None
            for avg in self.multilabel_avg_functions
        }
        self.memory_optimized_binary_evaluators = {
            avg: None
            for avg in self.binary_avg_functions
        }

        self.true_fact_name = "TRUE_TAG"
        self.pred_fact_name = "PREDICTED_TAG"

        self.true_fact_value = "650 kapital"
        self.pred_fact_value = "650 kuvand"

        self.core_variables_url = f"{TEST_VERSION_PREFIX}/core_variables/5/"

        # Construct a test query
        self.fact_names_to_filter = [self.true_fact_name, self.pred_fact_name]
        self.fact_values_to_filter = [
            "650 bioeetika", "650 rahvusbibliograafiad"
        ]
        self.test_query = Query()
        self.test_query.add_facts_filter(self.fact_names_to_filter,
                                         self.fact_values_to_filter,
                                         operator="must")
        self.test_query = self.test_query.__dict__()

        self.client.login(username="******", password="******")
Exemple #11
0
    def setUp(self):
        self.test_incorrect_index_name = TEST_INDEX
        self.test_index_name = CRF_TEST_INDEX
        self.test_index_copy = reindex_test_dataset(from_index=CRF_TEST_INDEX)
        self.user = create_test_user('crfOwner', '*****@*****.**', 'pw')
        self.project = project_creation("crfTestProject", self.test_index_name,
                                        self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/crf_extractors/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'

        self.embedding_ids = [None]
        self.test_crf_ids = []
        self.client.login(username='******', password='******')
Exemple #12
0
    def setUp(self) -> None:
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('Owner', '*****@*****.**', 'pw')
        self.unauthorized_user = create_test_user('unauthorized', '*****@*****.**', 'pw')
        self.file_name = "d41d8cd98f00b204e9800998ecf8427e.txt"

        self.project = project_creation("test_doc_parser", index_title=None, author=self.user)
        self.project.users.add(self.user)
        self.unauth_project = project_creation("unauth_project", index_title=None, author=self.user)

        self.file = SimpleUploadedFile("text.txt", b"file_content", content_type="text/html")
        self.client.login(username='******', password='******')
        self._basic_pipeline_functionality()
        self.file_path = self._get_file_path()
        self.ec = ElasticCore()
Exemple #13
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.normal_user = create_test_user('user', '*****@*****.**', 'pw')
        self.project = project_creation("applyAnalyzers", self.test_index_name,
                                        self.normal_user)
        self.project.users.add(self.normal_user)

        self.admin_user = create_test_user('admin', '*****@*****.**', 'pw')
        self.project.users.add(self.admin_user)

        self.unauthorized_user = create_test_user('unauthorized',
                                                  '*****@*****.**', 'pw')

        self.list_url = reverse(f"{VERSION_NAMESPACE}:apply_analyzers-list",
                                kwargs={"project_pk": self.project.pk})
        self.client.login(username='******', password='******')
Exemple #14
0
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('taggerOwner', '*****@*****.**', 'pw')
        self.project = project_creation("taggerTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/taggers/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'
        self.multitag_text_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/taggers/multitag_text/'

        # set vectorizer & classifier options
        self.vectorizer_opts = ('TfIdf Vectorizer', )
        self.classifier_opts = ('LinearSVC', )

        # list tagger_ids for testing. is populated during training test
        self.test_tagger_ids = []
        self.client.login(username='******', password='******')

        # new fact name and value used when applying tagger to index
        self.new_fact_name = "TEST_TAGGER_NAME"
        self.new_fact_value = "TEST_TAGGER_VALUE"

        # Create copy of test index
        self.reindex_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/reindexer/'
        # Generate name for new index containing random id to make sure it doesn't already exist
        self.test_index_copy = f"test_apply_tagger_{uuid.uuid4().hex}"

        self.reindex_payload = {
            "description": "test index for applying taggers",
            "indices": [self.test_index_name],
            "query": json.dumps(TEST_QUERY),
            "new_index": self.test_index_copy,
            "fields": [TEST_FIELD]
        }
        resp = self.client.post(self.reindex_url,
                                self.reindex_payload,
                                format='json')
        print_output("reindex test index for applying tagger:response.data:",
                     resp.json())
        self.reindexer_object = Reindexer.objects.get(pk=resp.json()["id"])

        self.test_imported_binary_tagger_id = self.import_test_model(
            TEST_TAGGER_BINARY)
Exemple #15
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('user', '*****@*****.**', 'pw')
        self.project = project_creation("RegexTaggerTestProject", self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/regex_taggers/'

        self.group_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/regex_tagger_groups/'

        self.tagger_id = None
        self.client.login(username='******', password='******')

        ids = []
        payloads = [
            {"description": "politsei", "lexicon": ["varas", "röövel", "vägivald", "pettus"]},
            {"description": "kiirabi", "lexicon": ["haav", "vigastus", "trauma"]},
            {"description": "tuletõrje", "lexicon": ["põleng", "õnnetus"]}
        ]

        tagger_url = reverse(f"{VERSION_NAMESPACE}:regex_tagger-list", kwargs={"project_pk": self.project.pk})
        for payload in payloads:
            response = self.client.post(tagger_url, payload)
            self.assertTrue(response.status_code == status.HTTP_201_CREATED)
            ids.append(int(response.data["id"]))

        self.police, self.medic, self.firefighter = ids

        # Create copy of test index
        self.reindex_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/reindexer/'
        # Generate name for new index containing random id to make sure it doesn't already exist
        self.test_index_copy = f"test_apply_regex_tagger_{uuid.uuid4().hex}"

        self.reindex_payload = {
            "description": "test index for applying taggers",
            "indices": [self.test_index_name],
            "query": json.dumps(TEST_QUERY),
            "new_index": self.test_index_copy,
            "fields": [TEST_FIELD]
        }
        resp = self.client.post(self.reindex_url, self.reindex_payload, format='json')
        print_output("reindex test index for applying tagger:response.data:", resp.json())
        self.reindexer_object = Reindexer.objects.get(pk=resp.json()["id"])
Exemple #16
0
    def setUp(self):
        """ user needs to be admin, because of changed indices permissions """
        self.test_index_name = reindex_test_dataset()
        self.default_password = '******'
        self.default_username = '******'
        self.user = create_test_user(self.default_username, '*****@*****.**',
                                     self.default_password)

        # create admin to test indices removal from project
        self.admin = create_test_user(name='admin', password='******')
        self.admin.is_superuser = True
        self.admin.save()
        self.project = project_creation("ReindexerTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.ec = ElasticCore()
        self.client.login(username=self.default_username,
                          password=self.default_password)

        self.new_index_name = f"{TEST_FIELD}_2"
Exemple #17
0
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('torchTaggerOwner', '*****@*****.**', 'pw')
        self.project = project_creation('torchTaggerTestProject',
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/torchtaggers/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'
        self.test_embedding_id = None
        self.torch_models = list(TORCH_MODELS.keys())
        self.test_tagger_id = None
        self.test_multiclass_tagger_id = None

        self.client.login(username='******', password='******')

        # new fact name and value used when applying tagger to index
        self.new_fact_name = "TEST_TORCH_TAGGER_NAME"
        self.new_multiclass_fact_name = "TEST_TORCH_TAGGER_NAME_MC"
        self.new_fact_value = "TEST_TORCH_TAGGER_VALUE"

        # Create copy of test index
        self.reindex_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/reindexer/'
        # Generate name for new index containing random id to make sure it doesn't already exist
        self.test_index_copy = f"test_apply_torch_tagger_{uuid.uuid4().hex}"

        self.reindex_payload = {
            "description": "test index for applying taggers",
            "indices": [self.test_index_name],
            "query": json.dumps(TEST_QUERY),
            "new_index": self.test_index_copy,
            "fields": TEST_FIELD_CHOICE
        }
        resp = self.client.post(self.reindex_url,
                                self.reindex_payload,
                                format='json')
        print_output(
            "reindex test index for applying torch tagger:response.data:",
            resp.json())
        self.reindexer_object = Reindexer.objects.get(pk=resp.json()["id"])
        self.ec = ElasticCore()
Exemple #18
0
    def setUp(self):
        """ User needs to be admin, because of changed indices permissions. """
        self.test_index_name = reindex_test_dataset()
        self.default_password = '******'
        self.default_username = '******'
        self.user = create_test_user(self.default_username, '*****@*****.**',
                                     self.default_password)

        self.admin = create_test_user(name='admin', password='******')
        self.admin.is_superuser = True
        self.admin.save()
        self.project = project_creation("IndexSplittingTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)

        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/index_splitter/'

        self.client.login(username=self.default_username,
                          password=self.default_password)
        self.ec = ElasticCore()
        self.FACT = "TEEMA"
Exemple #19
0
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('taggerOwner', '*****@*****.**', 'pw')
        self.project = project_creation("taggerTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/taggers/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'
        self.multitag_text_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/taggers/multitag_text/'

        # set vectorizer & classifier options
        self.vectorizer_opts = ('Count Vectorizer', )
        self.classifier_opts = ('Logistic Regression', )

        self.snowball_languages = (
            # 'english',
            # 'finnish',
            'estonian', )

        # list tagger_ids for testing. is populated during training test
        self.test_tagger_ids = []
        self.client.login(username='******', password='******')
Exemple #20
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('first_user', '*****@*****.**', 'pw')
        self.project = project_creation("FactManagementTestCase",
                                        self.test_index_name, self.user)

        self.uuid = uuid.uuid1().hex
        self.source = {
            "hello":
            "world",
            TEXTA_TAGS_KEY: [{
                "str_val": "politsei",
                "fact": "ORG",
                "spans": json.dumps([[0, 0]]),
                "doc_path": "hello"
            }]
        }
        self.ec = ElasticCore()
        self.ec.es.index(index=self.test_index_name,
                         id=self.uuid,
                         body=self.source,
                         refresh="wait_for")
        self.client.login(username='******', password='******')
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('taggerOwner', '*****@*****.**', 'pw')
        self.project = project_creation("taggerGroupTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/tagger_groups/'
        self.test_tagger_group_id = None

        self.client.login(username='******', password='******')
        # new fact name and value used when applying tagger to index
        self.new_fact_name = "TEST_TAGGER_GROUP_NAME"
        self.new_fact_name_tag_limit = "TEST_TAGGER_GROUP_NAME_LIMITED_TAGS"

        # Create copy of test index
        self.reindex_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/reindexer/'
        # Generate name for new index containing random id to make sure it doesn't already exist
        self.test_index_copy = f"test_apply_tagger_group_{uuid.uuid4().hex}"

        self.reindex_payload = {
            "description": "test index for applying tagger group",
            "indices": [self.test_index_name],
            "query": json.dumps(TEST_QUERY),
            "new_index": self.test_index_copy,
            "fields": [TEST_FIELD]
        }
        resp = self.client.post(self.reindex_url,
                                self.reindex_payload,
                                format='json')
        print_output(
            "reindex test index for applying tagger group:response.data:",
            resp.json())
        self.reindexer_object = Reindexer.objects.get(pk=resp.json()["id"])

        self.test_imported_tagger_group_id = self.import_test_model(
            TEST_TAGGER_GROUP)
    def setUp(self) -> None:
        self.test_index_name = reindex_test_dataset()
        self._set_up_project()
        self.tagger_group_list_url = reverse(
            f"{VERSION_NAMESPACE}:regex_tagger_group-list",
            kwargs={"project_pk": self.project.pk})
        self.tagger_list_url = reverse(
            f"{VERSION_NAMESPACE}:regex_tagger-list",
            kwargs={"project_pk": self.project.pk})

        self.police_id, self.medic_id, self.firefighter_id, self.emergency_tagger_group_id = self._set_up_tagger_group(
            [{
                "description": "politsei",
                "lexicon": ["varas", "röövel", "vägivald", "pettus"]
            }, {
                "description": "kiirabi",
                "lexicon": ["haav", "vigastus", "trauma"]
            }, {
                "description": "tuletõrje",
                "lexicon": ["põleng", "õnnetus"]
            }], "Hädaabi")

        self.stomach_pain_id, self.headache_id, self.pain_tagger_group_id = self._set_up_tagger_group(
            [
                {
                    "description":
                    "peavalu",
                    "lexicon": [
                        "migreen", "migreenid", "migreeni", "peavalu",
                        "pea valutab", "valutab pea"
                    ]
                },
                {
                    "description": "kõhuvalu",
                    "lexicon": ["kõht valutab", "kõhuvalu", "valutab kõht"]
                },
            ], "Valu")
Exemple #23
0
 def __reindex_test_index(self):
     self.test_index_name = reindex_test_dataset()
     self.__add_indices_to_project([self.test_index_name])
Exemple #24
0
    def setUp(self):
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('user', '*****@*****.**', 'pw')
        self.project = project_creation("RakunExtractorTestProject", self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/rakun_extractors/'
        self.embedding_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/embeddings/'

        self.rakun_id = None
        self.client.login(username='******', password='******')

        self.new_stopwords = ["New_word1", "New_word2"]

        """Create FastText Embedding, which will save facebook_model"""
        fasttext_payload = {
            "description": "TestEmbedding",
            "query": json.dumps(TEST_QUERY),
            "indices": [{"name": self.test_index_name}],
            "fields": TEST_FIELD_CHOICE,
            "embedding_type": "FastTextEmbedding"
        }
        print_output("Staring Rakun fasttext embedding", "post")

        response = self.client.post(self.embedding_url, json.dumps(fasttext_payload), content_type='application/json')
        print_output('test_create_rakun_fasttext_embedding_training_and_task_signal:response.data', response.data)
        # Check if Embedding gets created
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
        created_embedding = Embedding.objects.get(id=response.data['id'])
        self.test_embedding_id = created_embedding.id
        # Remove Embedding files after test is done
        print_output("created Rakun fasttext embedding task status", created_embedding.task.status)
        # Check if Task gets created via a signal
        self.assertTrue(created_embedding.task is not None)
        # Check if Embedding gets trained and completed
        self.assertEqual(created_embedding.task.status, Task.STATUS_COMPLETED)

        self.ids = []
        self.payloads = [
            {
                "description": "test_all",
                "distance_method": "fasttext",
                "distance_threshold": 1,
                "num_keywords": 1,
                "pair_diff_length": 1,
                "stopwords": ["word1", "word2"],
                "bigram_count_threshold": 2,
                "min_tokens": 1,
                "max_tokens": 2,
                "max_similar": 2,
                "max_occurrence": 2,
                "fasttext_embedding": self.test_embedding_id
            },
            {
                "description": "rakun_test_new",
                "distance_method": "fasttext",
                "distance_threshold": 1.0,
                "min_tokens": 1,
                "max_tokens": 2,
                "fasttext_embedding": self.test_embedding_id
            }
        ]

        rakun_url = reverse(f"{VERSION_NAMESPACE}:rakun_extractor-list", kwargs={"project_pk": self.project.pk})
        for payload in self.payloads:
            response = self.client.post(rakun_url, payload)
            self.assertTrue(response.status_code == status.HTTP_201_CREATED)
            self.ids.append(int(response.data["id"]))
Exemple #25
0
    def setUp(self):
        # Owner of the project
        self.test_index_name = reindex_test_dataset()
        self.user = create_test_user('BertTaggerOwner', '*****@*****.**', 'pw')
        self.admin_user = create_test_user("AdminBertUser",
                                           '*****@*****.**',
                                           'pw',
                                           superuser=True)
        self.project = project_creation("BertTaggerTestProject",
                                        self.test_index_name, self.user)
        self.project.users.add(self.user)
        self.project.users.add(self.admin_user)
        self.url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/bert_taggers/'
        self.project_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}'

        self.test_tagger_id = None
        self.test_multiclass_tagger_id = None

        self.client.login(username='******', password='******')

        # Check if TEST_BERT_MODEL is already downloaded
        available_models = get_downloaded_bert_models(
            BERT_PRETRAINED_MODEL_DIRECTORY)
        self.test_model_existed = True if TEST_BERT_MODEL in available_models else False
        download_bert_requirements(BERT_PRETRAINED_MODEL_DIRECTORY,
                                   [TEST_BERT_MODEL],
                                   cache_directory=BERT_CACHE_DIR,
                                   num_labels=2)

        # new fact name and value used when applying tagger to index
        self.new_fact_name = "TEST_BERT_TAGGER_NAME"
        self.new_multiclass_fact_name = "TEST_BERT_TAGGER_NAME_MC"
        self.new_fact_value = "TEST_BERT_TAGGER_VALUE"

        # Create copy of test index
        self.reindex_url = f'{TEST_VERSION_PREFIX}/projects/{self.project.id}/elastic/reindexer/'
        # Generate name for new index containing random id to make sure it doesn't already exist
        self.test_index_copy = f"test_apply_bert_tagger_{uuid.uuid4().hex}"

        self.reindex_payload = {
            "description": "test index for applying taggers",
            "indices": [self.test_index_name],
            "query": json.dumps(TEST_QUERY),
            "new_index": self.test_index_copy,
            "fields": TEST_FIELD_CHOICE
        }
        resp = self.client.post(self.reindex_url,
                                self.reindex_payload,
                                format='json')
        print_output(
            "reindex test index for applying bert tagger:response.data:",
            resp.json())
        self.reindexer_object = Reindexer.objects.get(pk=resp.json()["id"])

        self.test_imported_binary_gpu_tagger_id = self.import_test_model(
            TEST_BERT_TAGGER_BINARY_GPU)
        self.test_imported_multiclass_gpu_tagger_id = self.import_test_model(
            TEST_BERT_TAGGER_MULTICLASS_GPU)

        self.test_imported_binary_cpu_tagger_id = self.import_test_model(
            TEST_BERT_TAGGER_BINARY_CPU)
        self.ec = ElasticCore()