def __init__(self):
        """
        Initializes DataStore object with the config set in environment variables.

        Raises:
            DataStoreSettingsImproperlyConfiguredException if connection settings are invalid or missing
        """
        self._engine = CHATBOT_NER_DATASTORE.get(ENGINE)
        if self._engine is None:
            raise DataStoreSettingsImproperlyConfiguredException()
        self._connection_settings = CHATBOT_NER_DATASTORE.get(self._engine)
        if self._connection_settings is None:
            raise DataStoreSettingsImproperlyConfiguredException()
        # This can be index name for elastic search, table name for SQL,
        self._store_name = None
        self._client_or_connection = None
        self._connect()
Example #2
0
    def _check_doc_type_for_elasticsearch(self):
        """
        Checks if doc_type is present in connection settings, if not an exception is raised

        Raises:
             DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings
        """
        if ELASTICSEARCH_DOC_TYPE not in self._connection_settings:
            raise DataStoreSettingsImproperlyConfiguredException(
                'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment')
    def _check_doc_type_for_crf_data_elasticsearch(self):
        """
        Checks if doc_type is present in connection settings, if not an exception is raised

        Raises:
             DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings
        """
        # TODO: This check should be during init or boot
        if ELASTICSEARCH_CRF_DATA_DOC_TYPE not in self._connection_settings:
            raise DataStoreSettingsImproperlyConfiguredException(
                'Elasticsearch training data needs doc_type. Please configure '
                'ES_TRAINING_DATA_DOC_TYPE in your environment')
Example #4
0
 def _configure_store(self):
     """
     Configure self variables and connection settings.
     """
     self._connection_settings = CHATBOT_NER_DATASTORE.get(
         self._engine_name)
     if self._connection_settings is None:
         raise DataStoreSettingsImproperlyConfiguredException()
     self._check_doc_type_for_elasticsearch()
     self._index_name = self._connection_settings[
         constants.ELASTICSEARCH_ALIAS]
     self._doc_type = self._connection_settings[
         constants.ELASTICSEARCH_DOC_TYPE]
Example #5
0
    def _check_doc_type_for_elasticsearch(self):
        """
        Checks if doc_type is present in connection settings, if not an exception is raised

        Raises:
             DataStoreSettingsImproperlyConfiguredException if doc_type was not found in
             connection settings
        """
        # TODO: This check should be during init or boot
        if constants.ELASTICSEARCH_DOC_TYPE not in self._connection_settings:
            ner_logger.debug("No doc type is present")
            raise DataStoreSettingsImproperlyConfiguredException(
                'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment'
            )
Example #6
0
    def _check_doc_type_for_elasticsearch(self):
        """
        Checks if doc_type is present in connection settings, if not an exception is raised

        Raises:
             DataStoreSettingsImproperlyConfiguredException if doc_type was not found in
             connection settings
        """
        if constants.ELASTICSEARCH_DOC_TYPE not in self._connection_settings:
            ner_logger.debug(
                "No doc type is present in chatbot_ner.config.CHATBOT_NER_DATASTORE"
            )
            raise DataStoreSettingsImproperlyConfiguredException(
                'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment'
            )
Example #7
0
    def _configure_store(self, **kwargs):
        """
        Configure self variables and connection.
        Also add default connection to registry with alias `default`
        """
        self._connection_settings = CHATBOT_NER_DATASTORE. \
            get(self._engine_name)

        if self._connection_settings is None:
            raise DataStoreSettingsImproperlyConfiguredException()

        self._index_name = self._connection_settings[
            constants.ELASTICSEARCH_ALIAS]
        self._connection = self.connect(**self._connection_settings)

        self._conns['default'] = self._connection
 def transfer_entities_elastic_search(self, entity_list):
     """
     This method is used to transfer the entities from one environment to the other for elastic search engine
     only.
     Args:
         entity_list (list): List of entities that have to be transfered
     """
     if self._engine != ELASTICSEARCH:
         raise NonESEngineTransferException
     es_url = CHATBOT_NER_DATASTORE.get(self._engine).get('connection_url')
     if es_url is None:
         es_url = elastic_search.connect.get_es_url()
     if es_url is None:
         raise DataStoreSettingsImproperlyConfiguredException()
     destination = CHATBOT_NER_DATASTORE.get(self._engine).get('destination_url')
     es_object = elastic_search.transfer.ESTransfer(source=es_url, destination=destination)
     es_object.transfer_specific_entities(list_of_entities=entity_list)
    def create(self, err_if_exists=True, **kwargs):
        """
        Creates the schema/structure for the datastore depending on the engine configured in the environment.

        Args:
            err_if_exists (bool): if to throw error when index already exists, default True
            kwargs:
                For Elasticsearch:
                    master_timeout: Specify timeout for connection to master
                    timeout: Explicit operation timeout
                    update_all_types: Whether to update the mapping for all fields with the same name across all types
                                      or not
                    wait_for_active_shards: Set the number of active shards to wait for before the operation returns.
                    doc_type: The name of the document type
                    allow_no_indices: Whether to ignore if a wildcard indices expression resolves into no concrete
                                      indices. (This includes _all string or when no indices have been specified)
                    expand_wildcards: Whether to expand wildcard expression to concrete indices that are open, closed
                                      or both., default 'open', valid choices are: 'open', 'closed', 'none', 'all'
                    ignore_unavailable: Whether specified concrete indices should be ignored when unavailable
                                        (missing or closed)

        Refer--
           https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create
           https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.put_mapping

        Raises:
            DataStoreSettingsImproperlyConfiguredException if connection settings are invalid or missing
            All other exceptions raised by elasticsearch-py library
        """
        if self._client_or_connection is None:
            self._connect()

        if self._engine == ELASTICSEARCH:
            es_url = elastic_search.connect.get_es_url()
            create_map = [  # TODO: use namedtuples
                (True, ELASTICSEARCH_INDEX_1, ELASTICSEARCH_DOC_TYPE, self._store_name,
                 self._check_doc_type_for_elasticsearch, elastic_search.create.create_entity_index),
                (False, ELASTICSEARCH_INDEX_2, ELASTICSEARCH_DOC_TYPE, self._store_name,
                 self._check_doc_type_for_elasticsearch, elastic_search.create.create_entity_index),
                (False, ELASTICSEARCH_CRF_DATA_INDEX_NAME, ELASTICSEARCH_CRF_DATA_DOC_TYPE, None,
                 self._check_doc_type_for_crf_data_elasticsearch, elastic_search.create.create_crf_index),
            ]
            for (required, index_name_key, doc_type_key, alias_name, doc_type_checker, create_fn) in create_map:
                index_name = self._connection_settings.get(index_name_key)
                doc_type = self._connection_settings.get(doc_type_key)
                if not index_name:
                    if required:
                        raise DataStoreSettingsImproperlyConfiguredException(
                            '{} key is required in datastore settings for elastic_search')
                    else:
                        continue

                doc_type_checker()
                create_fn(
                    connection=self._client_or_connection,
                    index_name=index_name,
                    doc_type=doc_type,
                    logger=ner_logger,
                    err_if_exists=err_if_exists,
                    **kwargs
                )
                if alias_name:
                    elastic_search.create.create_alias(connection=self._client_or_connection,
                                                       index_list=[index_name],
                                                       alias_name=alias_name,
                                                       logger=ner_logger)