예제 #1
0
def location(request):
    """This functionality calls the get_location() functionality to detect location. It is called through api call

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = parse_parameters_from_request(request)
        entity_output = get_location(parameters_dict[PARAMETER_MESSAGE], parameters_dict[PARAMETER_ENTITY_NAME],
                                     parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                     parameters_dict[PARAMETER_FALLBACK_VALUE],
                                     parameters_dict[PARAMETER_BOT_MESSAGE],
                                     predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for location: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #2
0
파일: api.py 프로젝트: yespon/chatbot_ner
def shopping_size(request):
    """This functionality calls the get_shopping_size() functionality to detect size. It is called through api call

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        entity_output = get_shopping_size(parameters_dict[PARAMETER_MESSAGE], parameters_dict[PARAMETER_ENTITY_NAME],
                                          parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                          parameters_dict[PARAMETER_FALLBACK_VALUE],
                                          parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for shopping_size: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #3
0
def time_with_range(request):
    """This functionality calls the get_time_with_range() functionality to detect time. It is called through api call

    Args:
        request (django.http.request.HttpRequest): HttpRequest object
    Returns:
        response (django.http.response.HttpResponse): HttpResponse object
    """
    try:
        parameters_dict = parse_parameters_from_request(request)
        entity_output = get_time_with_range(parameters_dict[PARAMETER_MESSAGE], parameters_dict[PARAMETER_ENTITY_NAME],
                                            parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                            parameters_dict[PARAMETER_FALLBACK_VALUE],
                                            parameters_dict[PARAMETER_BOT_MESSAGE],
                                            parameters_dict[PARAMETER_TIMEZONE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except Exception as e:
        ner_logger.exception('Exception for time_with_range: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #4
0
def transfer_entities(request):
    """
    This method is used to transfer entities from the source to destination.
    Args:
        request (HttpResponse): HTTP response from url
    Returns:
        HttpResponse : HttpResponse with appropriate status and error message.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        entity_list = external_api_data.get(ENTITY_LIST)

        datastore_object = DataStore()
        datastore_object.transfer_entities_elastic_search(
            entity_list=entity_list)
        response['success'] = True

    except (IndexNotFoundException, InvalidESURLException,
            SourceDestinationSimilarException, InternalBackupException,
            AliasNotFoundException, PointIndexToAliasException,
            FetchIndexForAliasException, DeleteIndexFromAliasException,
            AliasForTransferException, IndexForTransferException,
            NonESEngineTransferException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    return HttpResponse(json.dumps(response),
                        content_type='application/json',
                        status=200)
예제 #5
0
파일: api.py 프로젝트: Ahanmr/chatbot_ner
def text(request):
    """This functionality initializes text detection functionality to detect textual entities.

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        fuzziness = parameters_dict[PARAMETER_FUZZINESS]
        min_token_len_fuzziness = parameters_dict[
            PARAMETER_MIN_TOKEN_LEN_FUZZINESS]
        text_detector = TextDetector(
            entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
            source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT])
        ner_logger.debug('fuzziness: %s min_token_len_fuzziness %s' %
                         (str(fuzziness), str(min_token_len_fuzziness)))
        if fuzziness:
            fuzziness = parse_fuzziness_parameter(fuzziness)
            text_detector.set_fuzziness_threshold(fuzziness)

        if min_token_len_fuzziness:
            min_token_len_fuzziness = int(min_token_len_fuzziness)
            text_detector.set_min_token_size_for_levenshtein(
                min_size=min_token_len_fuzziness)

        entity_output = text_detector.detect(
            message=parameters_dict[PARAMETER_MESSAGE],
            structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
            fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
            bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug(
            'Finished %s : %s ' %
            (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=500)
    return HttpResponse(json.dumps({'data': entity_output}),
                        content_type='application/json')
예제 #6
0
def update_crf_training_data(request):
    """
    This function is used to update the training data
     Args:
         request (HttpRequest): HTTP response from url
     Returns:
         HttpResponse : HttpResponse with appropriate status and error message.
    Example for data present in
    Post request body
    key: "external_api_data"
    value: {"sentence_list":["hello pratik","hello hardik"], "entity_list":[["pratik"], ["hardik"]],
    "entity_name":"training_try3", "language_script": "en"}
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        sentences = external_api_data.get(SENTENCES)
        entity_name = external_api_data.get(ENTITY_NAME)
        DataStore().update_entity_crf_data(entity_name=entity_name,
                                           sentences=sentences)
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException, EngineConnectionException,
            FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)
    return HttpResponse(json.dumps(response),
                        content_type='application/json',
                        status=200)
예제 #7
0
def update_dictionary(request):
    """
    This function is used to update the dictionary entities.
    Args:
        request (HttpResponse): HTTP response from url

    Returns:
        HttpResponse : HttpResponse with appropriate status and error message.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        entity_name = external_api_data.get(ENTITY_NAME)
        entity_data = external_api_data.get(ENTITY_DATA)
        language_script = external_api_data.get(LANGUAGE_SCRIPT)
        datastore_obj = DataStore()
        datastore_obj.update_entity_data(entity_name=entity_name,
                                         entity_data=entity_data,
                                         language_script=language_script)
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException, EngineConnectionException,
            FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)
    return HttpResponse(json.dumps(response),
                        content_type='application/json',
                        status=200)
예제 #8
0
def get_entity_word_variants(request):
    """
    This function is used obtain the entity dictionary given the dictionary name.
    Args:
        request (HttpResponse): HTTP response from url

    Returns:
        HttpResponse : With data consisting of a list of value variants.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        entity_name = request.GET.get(ENTITY_NAME)
        datastore_obj = DataStore()
        result = datastore_obj.get_entity_dictionary(entity_name=entity_name)

        structured_result = []
        # The list around result.keys() is to make it compatible to python3
        key_list = list(result.keys())
        key_list.sort()
        for value in key_list:
            structured_result.append({'value': value, 'variants': result[value]})
        result = structured_result

        response['result'] = result
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
예제 #9
0
def get_crf_training_data(request):
    """
    This function is used obtain the training data given the entity_name.
     Args:
         request (HttpRequest): HTTP response from url

     Returns:
         HttpResponse : With data consisting of a dictionary consisting of sentence_list and entity_list

     Examples:
         get request params
         key: "entity_name"
         value: "city"
    """
    response = {"success": False, "error": "", "result": []}
    try:
        entity_name = request.GET.get(ENTITY_NAME)
        languages = request.GET.get(LANGUAGES, '')

        languages = languages.split(',') if languages else []

        result = DataStore().get_crf_data_for_entity_name(entity_name=entity_name, languages=languages)

        response['result'] = result
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
예제 #10
0
def text(request):
    """This functionality initializes text detection functionality to detect textual entities.

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        fuzziness = parameters_dict[PARAMETER_FUZZINESS]
        min_token_len_fuzziness = parameters_dict[PARAMETER_MIN_TOKEN_LEN_FUZZINESS]
        read_model_from_s3 = json.loads(parameters_dict[PARAMETER_READ_MODEL_FROM_S3].lower())
        read_embeddings_from_remote_url = json.loads(parameters_dict[PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL].lower())
        text_model_detector = TextModelDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                                source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT],
                                                read_model_from_s3=read_model_from_s3,
                                                read_embeddings_from_remote_url=read_embeddings_from_remote_url,
                                                live_crf_model_path=parameters_dict[PARAMETER_LIVE_CRF_MODEL_PATH]
                                                )
        ner_logger.debug('fuzziness: %s min_token_len_fuzziness %s' % (str(fuzziness), str(min_token_len_fuzziness)))
        if fuzziness:
            fuzziness = parse_fuzziness_parameter(fuzziness)
            text_model_detector.set_fuzziness_threshold(fuzziness)

        if min_token_len_fuzziness:
            min_token_len_fuzziness = int(min_token_len_fuzziness)
            text_model_detector.set_min_token_size_for_levenshtein(min_size=min_token_len_fuzziness)

        entity_output = text_model_detector.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                                   structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                                   fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                                   bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=500)
    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #11
0
파일: api.py 프로젝트: yespon/chatbot_ner
def person_name(request):
    """This functionality calls the get_name() functionality to detect name. It is called through api call

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        entity_output = get_person_name(message=parameters_dict[PARAMETER_MESSAGE],
                                        entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                        structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                        fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                        bot_message=parameters_dict[PARAMETER_BOT_MESSAGE],
                                        language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
                                        predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for person_name: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #12
0
def translate_text(text,
                   source_language_code,
                   target_language_code=ENGLISH_LANG):
    """
    Args:
       text (str): Text snippet which needs to be translated
       source_language_code (str): ISO-639-1 code for language script corresponding to text ''
       target_language_code (str): ISO-639-1 code for target language script
    Return:
       dict: Dictionary containing two keys corresponding to 'status'(bool) and 'translated text'(unicode)
       For example: Consider following example
                    text: 'नमस्ते आप कैसे हैं'
                    'source_language_code': 'hi'
                    'target_language_code': 'en'

                    translate_text(text, 'hi', 'en')
                    >> {'status': True,
                       'translated_text': 'Hello how are you'}
    """
    response = {TRANSLATED_TEXT: None, 'status': False}
    try:
        query_params = {
            "q": text,
            "format": "text",
            "source": source_language_code,
            "target": target_language_code
        }
        url = TRANSLATE_URL + "&" + unicode_urlencode(query_params)
        request = requests.get(url, timeout=2)
        if request.status_code == 200:
            translate_response = request.json()
            response[TRANSLATED_TEXT] = translate_response["data"][
                "translations"][0]["translatedText"]
            response['status'] = True
    except Exception as e:
        ner_logger.exception('Exception while translation: %s ' % e)
    return response
예제 #13
0
파일: api.py 프로젝트: wanaxe/chatbot_ner
def text(request):
    """
    Run text detector with crf model on the 'message' passed in the request

    Args:
        request (django.http.HttpRequest): HTTP response from url

    Returns:
       dict: GET parameters from the request
    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        entity_output = get_text(
            message=parameters_dict[PARAMETER_MESSAGE],
            entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
            structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
            fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
            bot_message=parameters_dict[PARAMETER_BOT_MESSAGE],
            language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
            fuzziness=parameters_dict[PARAMETER_FUZZINESS],
            min_token_len_fuzziness=parameters_dict[
                PARAMETER_MIN_TOKEN_LEN_FUZZINESS],
            live_crf_model_path=parameters_dict[PARAMETER_LIVE_CRF_MODEL_PATH],
            read_model_from_s3=parameters_dict[PARAMETER_READ_MODEL_FROM_S3],
            read_embeddings_from_remote_url=parameters_dict[
                PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL],
        )
        ner_logger.debug(
            'Finished %s : %s ' %
            (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=500)
    return HttpResponse(json.dumps({'data': entity_output}),
                        content_type='application/json')
예제 #14
0
def read_model_dict_from_s3(bucket_name,
                            bucket_region,
                            model_path_location=None):
    """
    This method is used to read the model from S3 bucket and region specified.
    Args:
        bucket_name (str): name of the bucket to upload file to
        model_path_location (str): full path including filename on disk of the file to download
        bucket_region (str, Optional): region of the s3 bucket, defaults to None

    Returns:
        model_dict: Model from aws s3
    """
    model_dict = None
    try:
        s3 = boto3.resource('s3', region_name=bucket_region)
        bucket = s3.Bucket(bucket_name)
        pickle_file_handle = bucket.Object(model_path_location.lstrip('/'))
        # note read() will return str and hence cPickle.loads
        model_dict = pickle_file_handle.get()['Body'].read()
        ner_logger.debug("Model Read Successfully From s3")
    except Exception as e:
        ner_logger.exception("Error Reading model from s3 for domain %s " % e)
    return model_dict
예제 #15
0
def phone_number(request):
    """Uses PhoneDetector to detect phone numbers

        request params:
            message (list or str): string for get request and list of text for bulk call through
                                   post request on which detection logic is to be run
            entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
            structured_value (str): Value obtained from any structured elements. Note if structured value is
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
            fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
            bot_message (str): previous message from a bot/agent.
            source_language (str): language for which the phone numbers have to be detected

        Returns:
            response (django.http.response.HttpResponse): HttpResponse object
        Examples:

        message = "Call 02226129857 and message +1(408) 92-124 and send 100rs to 91 9820334416 9920441344"
        entity_name = 'phone_number'
        structured_value = None
        fallback_value = None
        bot_message = None
        source_language = 'en'

        entity_output:

         [
        {
            "detection": "message",
            "original_text": "91 9820334416",
            "entity_value": {
                "value": "919820334416"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "9920441344",
            "entity_value": {
                "value": "9920441344"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "02226129857",
            "entity_value": {
                "value": "02226129857"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "+1(408) 92-124",
            "entity_value": {
                "value": "140892124"
            },
            "language": "en"
        }
        ]
        message = ["Call 02226129857' , 'message +1(408) 92-124' ,'send 100rs to 91 9820334416 9920441344']
        entity_name = 'phone_number'
        source_language = 'en'

        entity_output:
        [
           [{
                    "detection": "message",
                    "original_text": "02226129857",
                    "entity_value": {
                        "value": "02226129857"
                    },
                    "language": "en"
                }

            ],
            [
                {
                    "detection": "message",
                    "original_text": "+1(408) 92-124",
                    "entity_value": {
                        "value": "140892124"
                    },
                    "language": "en"
                }
            ],
            [
                {
                    "detection": "message",
                    "original_text": "91 9820334416",
                    "entity_value": {
                        "value": "919820334416"
                    },
                    "language": "en"
                },
                {
                    "detection": "message",
                    "original_text": "9920441344",
                    "entity_value": {
                        "value": "9920441344"
                    },
                    "language": "en"
                }

            ]
        ]
        """
    try:
        parameters_dict = {}
        if request.method == "POST":
            parameters_dict = parse_post_request(request)
            ner_logger.debug('Start Bulk Detection: %s ' %
                             parameters_dict[PARAMETER_ENTITY_NAME])
        elif request.method == "GET":
            parameters_dict = get_parameters_dictionary(request)
            ner_logger.debug('Start: %s ' %
                             parameters_dict[PARAMETER_ENTITY_NAME])
        entity_name = parameters_dict[PARAMETER_ENTITY_NAME]
        language = parameters_dict[PARAMETER_SOURCE_LANGUAGE]

        ner_logger.debug('Entity Name %s' % entity_name)
        ner_logger.debug('Source Language %s' % language)

        phone_number_detection = PhoneDetector(
            entity_name=entity_name,
            language=language,
            locale=parameters_dict[PARAMETER_LOCALE])
        message = parameters_dict[PARAMETER_MESSAGE]
        entity_output = None
        ner_logger.debug(parameters_dict)
        if isinstance(message, six.string_types):
            entity_output = phone_number_detection.detect(
                message=message,
                structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        elif isinstance(message, (list, tuple)):
            entity_output = phone_number_detection.detect_bulk(
                messages=message)
        ner_logger.debug(
            'Finished %s : %s ' %
            (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for phone_number: %s ' % e)
        return HttpResponse(status=500)

    return JsonResponse({'data': entity_output})
예제 #16
0
파일: api.py 프로젝트: yespon/chatbot_ner
def text(request):
    """
    Run text detector with crf model on the 'message or list of messages' passed in the request

    Args:
        request (django.http.HttpRequest): HTTP response from url

    Returns:
        response (django.http.HttpResponse): HttpResponse object containing "entity_output"

        where "entity_output" is :
            list of dict: containing dict of detected entities with their original texts for a message
                OR
            list of lists: containing dict of detected entities with their original texts for each message in the list

        EXAMPLES:
        --- Single message
            >>> message = u'i want to order chinese from  mainland china and pizza from domminos'
            >>> entity_name = 'restaurant'
            >>> structured_value = None
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'message',
                    'original_text': 'mainland china',
                    'entity_value': {'value': u'Mainland China'}
                },
                {
                    'detection': 'message',
                    'original_text': 'domminos',
                    'entity_value': {'value': u"Domino's Pizza"}
                }
            ]



            >>> message = u'i wanted to watch movie'
            >>> entity_name = 'movie'
            >>> structured_value = u'inferno'
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'structure_value_verified',
                    'original_text': 'inferno',
                    'entity_value': {'value': u'Inferno'}
                }
            ]

            >>> message = u'i wanted to watch inferno'
            >>> entity_name = 'movie'
            >>> structured_value = u'delhi'
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'message',
                    'original_text': 'inferno',
                    'entity_value': {'value': u'Inferno'}
                }
            ]

        --- Bulk detection
            >>> message = [u'book a flight to mumbai',
                            u'i want to go to delhi from mumbai']
            >>> entity_name = u'city'
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                [
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'mumbai'},
                        'original_text': u'mumbai'
                    }
                ],
                [
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'New Delhi'},
                        'original_text': u'delhi'
                    },
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'mumbai'},
                        'original_text': u'mumbai'
                    }
                ]
            ]
    """
    try:
        parameters_dict = {}
        if request.method == "POST":
            parameters_dict = parse_post_request(request)
            ner_logger.debug('Start Bulk Detection: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        elif request.method == "GET":
            parameters_dict = get_parameters_dictionary(request)
            ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        entity_output = get_text(
            message=parameters_dict[PARAMETER_MESSAGE],
            entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
            structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
            fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
            bot_message=parameters_dict[PARAMETER_BOT_MESSAGE],
            language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
            fuzziness=parameters_dict[PARAMETER_FUZZINESS],
            min_token_len_fuzziness=parameters_dict[PARAMETER_MIN_TOKEN_LEN_FUZZINESS],
            live_crf_model_path=parameters_dict[PARAMETER_LIVE_CRF_MODEL_PATH],
            read_model_from_s3=parameters_dict[PARAMETER_READ_MODEL_FROM_S3],
            read_embeddings_from_remote_url=parameters_dict[PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL],
            predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS]
        )
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=500)
    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #17
0
def text(request):
    """
    Uses TextDetector to the get the values of multiple text entity detection. This is used
    for both single text message or multiple text message detection.

    Currently only POST method is supported.

    Args:
        request: request for text detection

    Request parameters

        message (list of str): list of message string for which detection logic needs to be run on.

        source_language (str): language for which the phone numbers have to be detected

        bot_message (str): previous message from a bot/agent.

        entities (dict): dictionary of entties to be detected, each entity dict will contain
            following details:

            entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
            structured_value (str): [Optional] Value obtained from any structured elements.

             Note if structured value is detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)

            fallback_value (str): [Optional] If the detection logic fails to detect any value
                  either from structured_value or message then we return a fallback_value as an output.

            use_fallback (bool): Default as False, if this is present for a single message
                                fallback value will be used.

            fuzziness (int): [Optional] Fuzziness value for each entity

            min_token_size_for_fuzziness (int): [Optional] minimum size for token match

    Returns:
         response (django.http.response.HttpResponse): HttpResponse object


    Examples:

        1) For single message:
                input request:
                        {
                    "message": ["I want to go to Jabalpur"],
                    "bot_message": null,
                    "language_script": "en",
                    "source_language": "en",
                    "entities": {
                        "city": {
                            "structured_value": "Delhi",
                            "fallback_value": null,
                            "predetected_values": ["Mumbai"],
                            "fuzziness": null,
                            "min_token_len_fuzziness": null,
                            "use_fallback": false
                        },
                        "restaurant": {
                            "structured_value": null,
                            "fallback_value": null,
                            "predetected_values": null,
                            "fuzziness": null,
                            "min_token_len_fuzziness": null,
                            "use_fallback": false
                                }
                             }
                         }
                output response:
                    {
                        "success": true,
                        "error": null,
                        "data": [
                            {
                            "entities": {
                                "restaurant": [],
                                "city": [
                                    {
                                        "entity_value": {
                                            "value": "New Delhi",
                                            "datastore_verified": true,
                                            "model_verified": false
                                        },
                                        "detection": "structure_value_verified",
                                        "original_text": "delhi",
                                        "language": "en"
                                    },
                                    {
                                        "entity_value": {
                                            "value": "Mumbai",
                                            "datastore_verified": false,
                                            "model_verified": true
                                        },
                                        "detection": "structure_value_verified",
                                        "original_text": "Mumbai",
                                        "language": "en"
                                    }
                                ]
                                },
                                "language": "en"
                            }
                        ]
                    }
    """
    data = []

    if request.method == "GET":
        response = {"success": False, "error": "Get method is not allowed"}
        return JsonResponse(response, status=405)

    elif request.method == "POST":
        ner_logger.debug("Fetching result")

        try:
            verify_text_request(request)
            # if verify success get detection data
            data = get_text_entity_detection_data(request)

        except KeyError as err:
            response = {"success": False, "error": str(err)}
            # TODO: move to ner_logger.error
            ner_logger.exception(response)
            return JsonResponse(response, status=400)
        except TypeError as err:
            response = {"success": False, "error": str(err)}
            ner_logger.exception(response)
            return JsonResponse(response, status=400)
        except Exception as err:
            response = {"success": False, "error": str(err)}
            ner_logger.exception(response)
            return JsonResponse(response, status=500)
    if data:
        response = {"success": True, "error": None, "data": data}
        return JsonResponse(response, status=200)
    else:
        response = {"success": False, "error": "Some error while parsing"}
        return JsonResponse(response, status=500)
예제 #18
0
def train_crf_model(request):
    """
    This method is used to train crf model.
    Args:
        request (HttpResponse): HTTP response from url
    Returns:
        HttpResponse : HttpResponse with appropriate status and error message.
    Post Request Body:
    key: "external_api_data"
    value: {
    "entity_name": "crf_test",
    "read_model_from_s3": true,
    "es_config": true,
    "read_embeddings_from_remote_url": true
    }
    """
    response = {"success": False, "error": "", "result": {}}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        entity_name = external_api_data.get(ENTITY_NAME)
        read_model_from_s3 = external_api_data.get(READ_MODEL_FROM_S3)
        es_config = external_api_data.get(ES_CONFIG)
        read_embeddings_from_remote_url = external_api_data.get(
            READ_EMBEDDINGS_FROM_REMOTE_URL)
        crf_model = CrfTrain(
            entity_name=entity_name,
            read_model_from_s3=read_model_from_s3,
            read_embeddings_from_remote_url=read_embeddings_from_remote_url)

        if es_config:
            model_path = crf_model.train_model_from_es_data()
        else:
            sentence_list = external_api_data.get(SENTENCE_LIST)
            entity_list = external_api_data.get(ENTITY_LIST)
            model_path = crf_model.train_crf_model_from_list(
                sentence_list=sentence_list, entity_list=entity_list)

        response['result'] = {LIVE_CRF_MODEL_PATH: model_path}
        response['success'] = True

    except (IndexNotFoundException, InvalidESURLException,
            SourceDestinationSimilarException, InternalBackupException,
            AliasNotFoundException, PointIndexToAliasException,
            FetchIndexForAliasException, DeleteIndexFromAliasException,
            AliasForTransferException, IndexForTransferException,
            NonESEngineTransferException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    return HttpResponse(json.dumps(response),
                        content_type='application/json',
                        status=200)
예제 #19
0
    def _get_substring_from_processed_text(text, matched_tokens):
        """
        Get part of original text that was detected as some entity value.

        This method was written to tackle cases when original text contains special characters which are dropped
        during tokenization

        Args:
            matched_tokens (list): list of tokens (usually tokens from fuzzy match results from ES)
                                   to find as a contiguous substring in the processed sentence considering the effects
                                   of tokenizer
            text (string or unicode): sentence from self.processed_text  from where indices of given token will be
                                            given

        Returns:
            str or unicode: part of original text that corresponds to given tokens

        E.g.
        self.processed_text = u'i want to order 1 pc hot & crispy'
        tokens = [u'i', u'want', u'to', u'order', u'1', u'pc', u'hot', u'crispy']
        indices = [(1, 2), (3, 7), (8, 10), (11, 16), (17, 18), (19, 21), (22, 25), (28, 34)])

        In: matched_tokens = [u'1', u'pc', u'hot', u'crispy']
        Out: 1 pc hot & crispy

        Notice that & is dropped during tokenization but when finding original text,
        we recover it from processed text
        """
        def _get_tokens_and_indices(txt):
            """
            Args:
                txt (str or unicode): text to get tokens from and indicies of those tokens in the given text

            Returns:
                tuple:
                    list: containing tokens, direct results from tokenizer.tokenize
                    list: containing (int, int) indicating start and end position of ith token (of first list)
                          in given text

            E.g.
            In: text = u'i want to order 1 pc hot & crispy'
            Out: ([u'i', u'want', u'to', u'order', u'1', u'pc', u'hot', u'crispy'],
                  [(1, 2), (3, 7), (8, 10), (11, 16), (17, 18), (19, 21), (22, 25), (28, 34)])

            """
            txt = txt.rstrip() + ' __eos__'
            processed_text_tokens = TOKENIZER.tokenize(txt)
            processed_text_tokens_indices = []

            offset = 0
            for token in processed_text_tokens:
                st = txt.index(token)
                en = st + len(token)

                # Small block to handle tricky cases like '(A B) C'
                # It extends the previous token's end boundary if there are special characters except whitespace
                # towards the end of previous token
                prefix = txt[:en]
                prefix_tokens = whitespace_tokenizer.tokenize(prefix)
                if prefix and len(prefix_tokens) > 1 and prefix_tokens[0]:
                    if processed_text_tokens_indices:
                        s, e = processed_text_tokens_indices.pop()
                        e += len(prefix_tokens[0])
                        processed_text_tokens_indices.append((s, e))

                txt = txt[en:]
                processed_text_tokens_indices.append(
                    (offset + st, offset + en))
                offset += en

            # remove eos parts
            processed_text_tokens.pop()
            processed_text_tokens_indices.pop()

            return processed_text_tokens, processed_text_tokens_indices

        try:
            n = len(matched_tokens)
            tokens, indices = _get_tokens_and_indices(text)
            for i in range(len(tokens) - n + 1):
                if tokens[i:i + n] == matched_tokens:
                    start = indices[i][0]
                    end = indices[i + n - 1][1]
                    return text[start:end]
        except (ValueError, IndexError):
            ner_logger.exception('Error getting original text (%s, %s)' %
                                 (matched_tokens, text))

        return u' '.join(matched_tokens)
예제 #20
0
def number_range(request):
    """Use NumberDetector to detect numerals

        Args:
            request: url parameters:

            request params:
                message (str): natural text on which detection logic is to be run. Note if structured value is
                                       detection is run on structured value instead of message
                entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                                  if entity uses elastic-search lookup
                structured_value (str): Value obtained from any structured elements. Note if structured value is
                                       detection is run on structured value instead of message
                                       (For example, UI elements like form, payload, etc)
                fallback_value (str): If the detection logic fails to detect any value either from structured_value
                                 or message then we return a fallback_value as an output.
                bot_message (str): previous message from a bot/agent.
                unit_type(str): restrict number range to detect for some unit types like 'currency', 'temperature'


       Returns:
           HttpResponse: Response containing dictionary having containing entity_value, original_text and detection;
                         entity_value is in itself a dict with its keys varying from entity to entity

       Examples:
           message = "we expect 200-300 people in room"
           entity_name = 'people_range'
           structured_value = None
           fallback_value = None
           bot_message = None
           unit_type=None
           output = number_range(request)
           print output

           >> [{'detection': 'message', 'original_text': '200-300', 'entity_value': {'min_value': '200',
                'max_value': '300', 'unit': None}}]
       """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])

        number_range_detector = NumberRangeDetector(
            entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
            language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
            unit_type=parameters_dict[PARAMETER_NUMBER_UNIT_TYPE])

        entity_output = number_range_detector.detect(
            message=parameters_dict[PARAMETER_MESSAGE],
            structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
            fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
            bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])

        ner_logger.debug(
            'Finished %s : %s ' %
            (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))

    except TypeError as e:
        ner_logger.exception('Exception for numeric: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}),
                        content_type='application/json')
예제 #21
0
파일: api.py 프로젝트: wanaxe/chatbot_ner
def date(request):
    """This functionality use DateAdvanceDetector to detect date. It is called through api call

    Args:
        request (django.http.request.HttpRequest): HttpRequest object

        request params:
            message (str): natural text on which detection logic is to be run. Note if structured value is present
                                   detection is run on structured value instead of message
            entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
            structured_value (str): Value obtained from any structured elements. Note if structured value is present
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
            fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
            bot_message (str): previous message from a bot/agent.
            timezone (str): timezone of the user
            source_language (str): source language code (ISO 639-1)
            language_script (str): language code of script (ISO 639-1)

    Returns:
        response (django.http.response.HttpResponse): HttpResponse object

    Example:

           message = "agle mahine k 5 tarikh ko mera birthday hai"
           entity_name = 'time'
           structured_value = None
           fallback_value = None
           bot_message = None
           timezone = 'UTC'
           source_language = 'hi'
           language_script = 'en'
           output = date(request)
           print output

           >>  [{'detection': 'message', 'original_text': 'agle mahine k 5 tarikh',
                 'entity_value': {'value': {'mm': 12, 'yy': 2018, 'dd': 5, 'type': 'date'}}}]
    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        timezone = parameters_dict[PARAMETER_TIMEZONE] or 'UTC'
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        date_past_reference = parameters_dict.get(PARAMETER_PAST_DATE_REFERENCED, "false")
        past_date_referenced = date_past_reference == 'true' or date_past_reference == 'True'
        date_detection = DateAdvancedDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                              language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
                                              timezone=timezone,
                                              past_date_referenced=past_date_referenced)

        date_detection.set_bot_message(bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])

        entity_output = date_detection.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                              structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                              fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE])

        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for date: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #22
0
파일: api.py 프로젝트: wanaxe/chatbot_ner
def phone_number(request):
    """Uses PhoneDetector to detect phone numbers

        request params:
            message (str): natural text on which detection logic is to be run. Note if structured value is
                                   detection is run on structured value instead of message
            entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
            structured_value (str): Value obtained from any structured elements. Note if structured value is
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
            fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
            bot_message (str): previous message from a bot/agent.
            source_language (str): language for which the phone numbers have to be detected

        Returns:
            response (django.http.response.HttpResponse): HttpResponse object
        Examples:

        message = "Call 02226129857 and message +1(408) 92-124 and send 100rs to 91 9820334416 9920441344"
        entity_name = 'phone_number'
        structured_value = None
        fallback_value = None
        bot_message = None
        source_language = 'en'

        entity_output:

         [
        {
            "detection": "message",
            "original_text": "91 9820334416",
            "entity_value": {
                "value": "919820334416"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "9920441344",
            "entity_value": {
                "value": "9920441344"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "02226129857",
            "entity_value": {
                "value": "02226129857"
            },
            "language": "en"
        },
        {
            "detection": "message",
            "original_text": "+1(408) 92-124",
            "entity_value": {
                "value": "140892124"
            },
            "language": "en"
        }
    ]

        """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        entity_name = parameters_dict[PARAMETER_ENTITY_NAME]
        language = parameters_dict[PARAMETER_SOURCE_LANGUAGE]

        ner_logger.debug('Entity Name %s' % entity_name)
        ner_logger.debug('Source Language %s' % language)

        phone_number_detection = PhoneDetector(entity_name=entity_name, language=language)

        entity_output = phone_number_detection.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                                      structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                                      fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                                      bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for phone_number: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #23
0
파일: api.py 프로젝트: wanaxe/chatbot_ner
def number(request):
    """Use NumberDetector to detect numerals

       Attributes:
        request: url parameters:

        request params:
           message (str): natural text on which detection logic is to be run. Note if structured value is
                                   detection is run on structured value instead of message
           entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
           structured_value (str): Value obtained from any structured elements. Note if structured value is
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
           fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
           bot_message (str): previous message from a bot/agent.
           unit_type(str): restrict number range to detect for some unit types like 'currency', 'temperature'

           min_digit (str): min digit
           max_digit (str): max digit


       Returns:
           dict or None: dictionary containing entity_value, original_text and detection;
                         entity_value is in itself a dict with its keys varying from entity to entity

       Example:

           message = "I want to purchase 30 units of mobile and 40 units of Television"
           entity_name = 'number_of_unit'
           structured_value = None
           fallback_value = None
           bot_message = None
           unit_type = None
           output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                              fallback_value=fallback_value, bot_message=bot_message, min_digit=1, max_digit=2)
           print output

               >> [{'detection': 'message', 'original_text': '30', 'entity_value': {'value': '30', 'unit': None}},
                   {'detection': 'message', 'original_text': '40', 'entity_value': {'value': '40', 'unit': None}}]


           message = "I want to reserve a table for 3 people"
           entity_name = 'number_of_people'
           structured_value = None
           fallback_value = None
           bot_message = None
           unit_type = None
           min_digit=1
           max_digit=6
           output = number(request)
           print output

               >> [{'detection': 'message', 'original_text': 'for 3 people', 'entity_value':
                                                                        {'value': '3', 'unit': 'people'}}]

       """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])

        number_detection = NumberDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                          language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
                                          unit_type=parameters_dict[PARAMETER_NUMBER_UNIT_TYPE])

        if parameters_dict[PARAMETER_MIN_DIGITS] and parameters_dict[PARAMETER_MAX_DIGITS]:
            min_digit = int(parameters_dict[PARAMETER_MIN_DIGITS])
            max_digit = int(parameters_dict[PARAMETER_MAX_DIGITS])
            number_detection.set_min_max_digits(min_digit=min_digit, max_digit=max_digit)

        entity_output = number_detection.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                                structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                                fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                                bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))

    except TypeError as e:
        ner_logger.exception('Exception for numeric: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #24
0
파일: api.py 프로젝트: wanaxe/chatbot_ner
def time(request):
    """This functionality use TimeDetector to detect time. It is called through api call

    Args:
        request (django.http.request.HttpRequest): HttpRequest object

        request params:
            message (str): natural text on which detection logic is to be run. Note if structured value is present
                                   detection is run on structured value instead of message
            entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
            structured_value (str): Value obtained from any structured elements. Note if structured value is present
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
            fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
            bot_message (str): previous message from a bot/agent.
            timezone (str): timezone of the user
            source_language (str): source language code (ISO 639-1)
            language_script (str): language code of script (ISO 639-1)

    Returns:
        response (django.http.response.HttpResponse): HttpResponse object

    Example:

           message = "kal subah 5 baje mujhe jaga dena"
           entity_name = 'time'
           structured_value = None
           fallback_value = None
           bot_message = None
           timezone = 'UTC'
           source_language = 'hi'
           language_script = 'en'
           output = time(request)
           print output

           >>  [{'detection': 'message', 'original_text': '12:30 pm',
                'entity_value': {'mm': 30, 'hh': 12, 'nn': 'pm'}}]
    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        timezone = parameters_dict[PARAMETER_TIMEZONE] or 'UTC'
        form_check = True if parameters_dict[PARAMETER_STRUCTURED_VALUE] else False
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        time_detection = TimeDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                      language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
                                      timezone=timezone,
                                      form_check=form_check)

        time_detection.set_bot_message(bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        entity_output = time_detection.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                              structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                              fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE])

        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for time: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #25
0
def text(request):
    """
    Run text detector with crf model on the 'message or list of messages' passed in the request

    Args:
        request (django.http.HttpRequest): HTTP response from url

    Returns:
        response (django.http.HttpResponse): HttpResponse object containing "entity_output"

        where "entity_output" is :
            list of dict: containing dict of detected entities with their original texts for a message
                OR
            list of lists: containing dict of detected entities with their original texts for each message in the list

        EXAMPLES:
        --- Single message
            >>> message = u'i want to order chinese from  mainland china and pizza from domminos'
            >>> entity_name = 'restaurant'
            >>> structured_value = None
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'message',
                    'original_text': 'mainland china',
                    'entity_value': {'value': u'Mainland China'}
                },
                {
                    'detection': 'message',
                    'original_text': 'domminos',
                    'entity_value': {'value': u"Domino's Pizza"}
                }
            ]



            >>> message = u'i wanted to watch movie'
            >>> entity_name = 'movie'
            >>> structured_value = u'inferno'
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'structure_value_verified',
                    'original_text': 'inferno',
                    'entity_value': {'value': u'Inferno'}
                }
            ]

            >>> message = u'i wanted to watch inferno'
            >>> entity_name = 'movie'
            >>> structured_value = u'delhi'
            >>> fallback_value = None
            >>> bot_message = None
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                {
                    'detection': 'message',
                    'original_text': 'inferno',
                    'entity_value': {'value': u'Inferno'}
                }
            ]

        --- Bulk detection
            >>> message = [u'book a flight to mumbai',
                            u'i want to go to delhi from mumbai']
            >>> entity_name = u'city'
            >>> entity_output = get_text(message=message,
            >>>                   entity_name=entity_name,
            >>>                   structured_value=structured_value,
            >>>                   fallback_value=fallback_value,
            >>>                   bot_message=bot_message)
            >>> print(entity_output)

            [
                [
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'mumbai'},
                        'original_text': u'mumbai'
                    }
                ],
                [
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'New Delhi'},
                        'original_text': u'delhi'
                    },
                    {
                        'detection': 'message',
                        'entity_value': {'value': u'mumbai'},
                        'original_text': u'mumbai'
                    }
                ]
            ]
    """
    try:
        parameters_dict = parse_parameters_from_request(request)
        entity_output = get_text(
            message=parameters_dict[PARAMETER_MESSAGE],
            entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
            structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
            fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
            bot_message=parameters_dict[PARAMETER_BOT_MESSAGE],
            language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
            fuzziness=parameters_dict[PARAMETER_FUZZINESS],
            min_token_len_fuzziness=parameters_dict[PARAMETER_MIN_TOKEN_LEN_FUZZINESS],
            predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS]
        )
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except DataStoreRequestException as err:
        ner_logger.exception(f"Error in requesting ES {request.path}, error: {err}, query: {err.request},"
                             f" response: {err.response}")
        return HttpResponse(status=500)
    except es_exceptions.ConnectionTimeout as err:
        ner_logger.exception(f"Error in text_synonym for: {request.path}, error: {err}")
        return HttpResponse(status=500)
    except es_exceptions.ConnectionError as err:
        ner_logger.exception(f"Error in text_synonym for:  {request.path}, error: {err}")
        return HttpResponse(status=500)
    except (TypeError, KeyError) as err:
        ner_logger.exception(f"Error in text_synonym for: {request.path}, error: {err}")
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
예제 #26
0
def verify_text_request(request):
    """
    Check the request object
    1. If proper message or entity is present in required
    format.

    2. If length of message or entity is in allowed range

    Args:
        request: API request object

    Returns:
        Raises KeyError if message or entities are not present
        Raises TypeError if message is not list or entities is not dict type
        Else Return none
    """

    request_data = json.loads(request.body)
    messages = request_data.get("messages")
    entities = request_data.get("entities")

    if not messages:
        ner_logger.exception("messages param is not passed")
        raise KeyError("key messages is required")

    if not entities:
        ner_logger.exception("Entities param is not passed")
        raise KeyError("Entities dict is required")

    if not isinstance(messages, list):
        ner_logger.exception("messages param is not in correct format")
        raise TypeError("messages should be in format of list of string")

    if not isinstance(entities, dict):
        ner_logger.exception("Entities param is not in correct format")
        raise TypeError("Entities should be dict of entity details")

    if len(messages) > MAX_NUMBER_BULK_MESSAGE:
        ner_logger.exception(f"Maximum number of message can be {MAX_NUMBER_BULK_MESSAGE} for "
                             "bulk detection")
        raise ValueError(f"Maximum number of message can be {MAX_NUMBER_BULK_MESSAGE} for "
                         "bulk detection")

    if len(list(entities)) > MAX_NUMBER_MULTI_ENTITIES:
        ner_logger.exception(f"Maximum number of entities can be {MAX_NUMBER_MULTI_ENTITIES} for "
                             " detection")
        raise ValueError(f"Maximum number of entities can be {MAX_NUMBER_MULTI_ENTITIES} for "
                         "bulk detection")
예제 #27
0
    def _get_substring_from_processed_text(self, matched_tokens):
        """
        Get part of original text that was detected as some entity value.

        This method was written to tackle cases when original text contains special characters which are dropped
        during tokenization

        Args:
            matched_tokens (list): list of tokens (usually tokens from fuzzy match results from ES)
                                   to find as a contiguous substring in the processed text considering the effects
                                   of tokenizer

        Returns:
            str or unicode: part of original text that corresponds to given tokens

        E.g.
        self.processed_text = u'i want to order 1 pc hot & crispy'
        tokens = [u'i', u'want', u'to', u'order', u'1', u'pc', u'hot', u'crispy']
        indices = [(1, 2), (3, 7), (8, 10), (11, 16), (17, 18), (19, 21), (22, 25), (28, 34)])

        In: matched_tokens = [u'1', u'pc', u'hot', u'crispy']
        Out: 1 pc hot & crispy

        Notice that & is dropped during tokenization but when finding original text, we recover it from processed text
        """

        def _get_tokens_and_indices(text):
            """
            Args:
                text (str or unicode): text to get tokens from and indicies of those tokens in the given text

            Returns:
                tuple:
                    list: containing tokens, direct results from tokenizer.tokenize
                    list: containing (int, int) indicating start and end position of ith token (of first list)
                          in given text

            E.g.
            In: text = u'i want to order 1 pc hot & crispy'
            Out: ([u'i', u'want', u'to', u'order', u'1', u'pc', u'hot', u'crispy'],
                  [(1, 2), (3, 7), (8, 10), (11, 16), (17, 18), (19, 21), (22, 25), (28, 34)])

            """
            processed_text_tokens = TOKENIZER.tokenize(text)
            processed_text_tokens_indices = []

            offset = 0
            txt = text
            for token in processed_text_tokens:
                st = txt.index(token)
                en = st + len(token)
                txt = txt[en:]
                processed_text_tokens_indices.append((offset + st, offset + en))
                offset += en

            return processed_text_tokens, processed_text_tokens_indices

        try:
            n = len(matched_tokens)
            tokens, indices = _get_tokens_and_indices(self.processed_text)
            for i in range(len(tokens) - n + 1):
                if tokens[i:i + n] == matched_tokens:
                    start = indices[i][0]
                    end = indices[i + n - 1][1]
                    return self.processed_text[start:end]
        except (ValueError, IndexError):
            ner_logger.exception('Error getting original text (%s, %s)' % (matched_tokens, self.processed_text))

        return u' '.join(matched_tokens)