def train_model_from_es_data(self):
        """
        This method is used to train the crf model by first extracting training data from ES
        for the entity and training the crf model for the same.
        """
        datastore_object = DataStore()
        ner_logger.debug('Fetch of data from ES for ENTITY: %s started' %
                         self.entity_name)
        result = datastore_object.get_crf_data_for_entity_name(
            entity_name=self.entity_name)

        sentence_list = result.get(SENTENCE_LIST, [])
        entity_list = result.get(ENTITY_LIST, [])

        if not sentence_list:
            raise ESCrfTrainingTextListNotFoundException()
        if not entity_list:
            raise ESCrfTrainingEntityListNotFoundException()

        ner_logger.debug('Fetch of data from ES for ENTITY: %s completed' %
                         self.entity_name)
        ner_logger.debug('Length of text_list %s' % str(len(sentence_list)))

        model_path = self.train_crf_model_from_list(
            entity_list=entity_list, sentence_list=sentence_list)
        return model_path
Exemple #2
0
def get_crf_training_data(request):
    """
    This function is used obtain the training data given the entity_name.
     Args:
         request (HttpResponse): HTTP response from url

     Returns:
         HttpResponse : With data consisting of a dictionary consisting of sentence_list and entity_list

     Examples:
         get request params
         key: "entity_name"
         value: "city"
    """
    response = {"success": False, "error": "", "result": []}
    try:
        entity_name = request.GET.get(ENTITY_NAME)
        datastore_obj = DataStore()
        result = datastore_obj.get_crf_data_for_entity_name(entity_name=entity_name)
        response['result'] = result
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    return HttpResponse(json.dumps(response), content_type='application/json', status=200)