def check_for_existence(value): ec = ElasticCore() index = parse_index_input(value) in_elastic = ec.check_if_indices_exist(indices=[index]) if in_elastic: # This line helps keep the database and Elastic in sync. index, is_created = Index.objects.get_or_create(name=index) else: # We check for a loose Index object just in case and delete them. Index.objects.filter(name=index).delete() raise NoIndexExists(f"Could not access index '{index}'")
def create(self, request, **kwargs): data = IndexSerializer(data=request.data) data.is_valid(raise_exception=True) es = ElasticCore() index = data.validated_data["name"] is_open = data.validated_data["is_open"] description = data.validated_data["description"] added_by = data.validated_data["added_by"] test = data.validated_data["test"] source = data.validated_data["source"] client = data.validated_data["client"] domain = data.validated_data["domain"] # Using get_or_create to avoid unique name constraints on creation. if es.check_if_indices_exist([index]): # Even if the index already exists, create the index object just in case index, is_created = Index.objects.get_or_create(name=index) if is_created: utc_time = es.get_index_creation_date(index) index.is_open = is_open index.description = description index.added_by = added_by index.test = test index.source = source index.client = client index.domain = domain index.created_at = utc_time index.save() raise ElasticIndexAlreadyExists() else: es.create_index(index=index) if not is_open: es.close_index(index) index, is_created = Index.objects.get_or_create(name=index) if is_created: utc_time = es.get_index_creation_date(index) index.is_open = is_open index.description = description index.added_by = added_by index.test = test index.source = source index.client = client index.domain = domain index.created_at = utc_time index.save() return Response( {"message": f"Added index {index} into Elasticsearch!"}, status=status.HTTP_201_CREATED)
def add_indices(self, request, pk=None, project_pk=None): project: Project = self.get_object() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) indices = [index.name for index in serializer.validated_data["indices"]] ec = ElasticCore() exists = ec.check_if_indices_exist(indices) if exists and indices: for index_name in indices: index, is_created = Index.objects.get_or_create(name=index_name) project.indices.add(index) return Response({"detail": f"Added indices '{str(indices)}' to the project!"}) else: raise ValidationError(f"Could not validate indices f'{str(indices)}'")
def check_and_create(indices: str): from texta_elastic.core import ElasticCore ec = ElasticCore() if isinstance(indices, list): indices = indices elif isinstance(indices, str): indices = indices.split(",") for index in indices: does_exist = ec.check_if_indices_exist([index]) if does_exist: Index.objects.get_or_create(name=index) else: Index.objects.filter(name=index).delete()
class Feedback: def __init__(self, project_pk, model_object=None, text_processor=None, callback_progress=None, prediction_to_match=None, es_prefix=get_core_setting("TEXTA_ES_PREFIX"), deploy_key=getattr(settings, "DEPLOY_KEY")): self.es_core = ElasticCore() self.project_pk = project_pk self.feedback_index = f"{es_prefix}texta-{deploy_key}-feedback-project-{project_pk}" self.model_object = model_object self.es_doc, self.es_search, self.query = self._initialize_es( project_pk, text_processor, callback_progress, prediction_to_match) def __iter__(self): """ Iterator for iterating through scroll of documents for given model """ if self.check_index_exists(): return self.es_search.scroll() else: return iter(()) def check_index_exists(self): return self.es_core.check_if_indices_exist([self.feedback_index]) def _initialize_es(self, project_pk, text_processor, callback_progress, prediction_to_match): # create es doc es_doc = ElasticDocument(self.feedback_index) # if no model objects, return nones for query and search if not self.model_object: return es_doc, None, None # create mathing query query = Query() query.add_string_filter(query_string=self.model_object.MODEL_TYPE, fields=["model_type"]) if self.model_object: query.add_string_filter(query_string=str(self.model_object.pk), fields=["model_id"]) if prediction_to_match: query.add_string_filter(query_string=prediction_to_match, fields=["correct_result"]) # if no index, don't create searcher object if not self.check_index_exists(): return es_doc, None, query.query # create es search es_search = ElasticSearcher(indices=self.feedback_index, query=query.query, text_processor=text_processor, output=ElasticSearcher.OUT_DOC_WITH_ID, callback_progress=callback_progress) # return objects return es_doc, es_search, query.query def list(self): """ Lists feedback for a given model. """ # this is because the index might not exist yet # check the _initialize_es method for more info if not self.es_search: return [] else: return self.es_search.search()['hits']['hits'] def _text_to_doc(self, text): """ Generates document dict using input text and list of fields. """ # retrieve list of fields model was trained on model_fields = json.loads(self.model_object.fields) return {field_path: text for field_path in model_fields} def store(self, content, prediction): """ Stores document with initial prediction in ES. """ # if predicted on text, generate doc if isinstance(content, str): content = self._text_to_doc(content) # generate feedback doc wrapping predicted doc feedback_doc = { "model_id": str(self.model_object.pk), "model_type": self.model_object.MODEL_TYPE, "content": json.dumps(content), "original_prediction": str(prediction), "prediction_time": datetime.now() } try: # add document and return id return self.es_doc.add(feedback_doc)["_id"] except Exception as e: Logger().error("Failed indexing model feedback", exc_info=e) return None def add(self, feedback_id, correct_result): """ Adds correct prediction to indexed doc. """ try: document = self.es_doc.get(feedback_id) document["_source"]["correct_result"] = json.dumps(correct_result) document["_source"]["feedback_time"] = datetime.now() doc_type = document.get("_type", "_doc") self.es_doc.update(index=document["_index"], doc_id=feedback_id, doc=document["_source"], doc_type=doc_type) return {"success": "Tagger feedback updated."} except Exception as e: error_msg = "Failed changing model feedback." Logger().error(error_msg, exc_info=e) return {"error": f"{error_msg}: e"} def delete(self): """ Deletes feedback for given model. """ try: deleted = self.es_doc.delete_by_query(self.query)["deleted"] return {"success": f"deleted {deleted} feedback item(s)."} except Exception as e: error_msg = "Feedback document delete failed." Logger().error(error_msg, exc_info=e) return {"error": f"{error_msg}: e"} def delete_index(self): """ Deletes feedback index for given project. """ try: deleted = self.es_doc.core.delete_index(self.feedback_index) return { "success": f"deleted feedback index: {self.feedback_index}." } except Exception as e: error_msg = "Feedback index delete failed." Logger().error(error_msg, exc_info=e) return {"error": f"{error_msg}: e"}