예제 #1
0
def suggest_locations():
    """
    Suggest locations in a text string. These might be useful keywords for
    annotators to geolocate.

    input: full message's text [string]
    output: dict of with outer key "locations". inner keys are entity types, with list of entities.

        "locations": {
          "GSP": [
            "Congo"
          ]
        }

    # [TODO]
    # output: list. each item is a python dictionary:
    #     - text : the text for the specific entity [string]
    #     - indices : tuple of (start [int], end [int]) offset where entity is
    #       located in given full message
    #     - confidence : probability from 0-to-1 [float]
    """
    if not request.json and not 'text' in request.json:
        abort(400)

    # Get all entities and only fetch GPE
    entities = Machine.guess_locations(request.json['text'])
    for k, v in entities.iteritems():
        entities[k] = list(v)

    return jsonify({'locations': entities})
예제 #2
0
def suggest_locations():
    """
    Suggest locations in a text string. These might be useful keywords for
    annotators to geolocate.

    input: full message's text [string]
    output: dict of with outer key "locations". inner keys are entity types, with list of entities.

        "locations": {
          "GSP": [
            "Congo"
          ]
        }

    # [TODO]
    # output: list. each item is a python dictionary:
    #     - text : the text for the specific entity [string]
    #     - indices : tuple of (start [int], end [int]) offset where entity is
    #       located in given full message
    #     - confidence : probability from 0-to-1 [float]
    """
    if not request.json and not 'text' in request.json:
        abort(400)

    # Get all entities and only fetch GPE
    entities = Machine.guess_locations(request.json['text'])
    for k, v in entities.iteritems():
        entities[k] = list(v)

    return jsonify({'locations': entities})
예제 #3
0
def detect_language():
    """Given some text, returns a ranked list of likey natural languages
    the given content is in

    Input parameters:
        text: string
    """
    if not request.json or not 'text' in request.json:
        abort(400)
    language = Machine.guess_language(request.json['text'])[0]

    return jsonify({'language': language[0], "confidence": language[1]})
예제 #4
0
def detect_language():
    """Given some text, returns a ranked list of likey natural languages
    the given content is in

    Input parameters:
        text: string
    """
    if not request.json or not 'text' in request.json:
        abort(400)
    language = Machine.guess_language(request.json['text'])[0]

    return jsonify({'language': language[0], "confidence": language[1]})
예제 #5
0
def extract_entities():
    """Given some text input, identify - besides location - people,
    organisations and other types of entities within the text"""
    pass
    if not request.json and not 'text' in request.json:
        abort(400)

    result = Machine.guess_entities(request.json['text'])

    entities = {}
    for key, value in result.iteritems():
        entities[key.lower()] = list(value)

    return jsonify({'entities': entities})
예제 #6
0
def extract_entities():
    """Given some text input, identify - besides location - people,
    organisations and other types of entities within the text"""
    pass
    if not request.json and not 'text' in request.json:
        abort(400)

    result = Machine.guess_entities(request.json['text'])

    entities = {}
    for key, value in result.iteritems():
        entities[key.lower()] = list(value)

    return jsonify({'entities': entities})
예제 #7
0
def suggest_sensitive_info():
    """
    Suggest personally identifying information (PII) -- such as
    credit card numbers, phone numbers, email, etc --
    from a text string. These are useful for annotators to investigate
    and strip before publicly posting information.

    input: text,
    input: options
        - custom regex for local phone numbers
        - flags or booleans to specify the type of pii (e.g. phone_only)
    output: list of dictionaries:
        - word
        - type (e-mail, phone, ID, person name, etc.)
        - indices (start/end offset in text)
        - confidence [todo: is possible?]
    """
    if not request.json and not 'text' in request.json:
        abort(400)

    private_info = Machine.guess_private_info(request.json['text'])
    return jsonify({'private_info': private_info})
예제 #8
0
def suggest_sensitive_info():
    """
    Suggest personally identifying information (PII) -- such as
    credit card numbers, phone numbers, email, etc --
    from a text string. These are useful for annotators to investigate
    and strip before publicly posting information.

    input: text,
    input: options
        - custom regex for local phone numbers
        - flags or booleans to specify the type of pii (e.g. phone_only)
    output: list of dictionaries:
        - word
        - type (e-mail, phone, ID, person name, etc.)
        - indices (start/end offset in text)
        - confidence [todo: is possible?]
    """
    if not request.json and not 'text' in request.json:
        abort(400)

    private_info = Machine.guess_private_info(request.json['text'])
    return jsonify({'private_info': private_info})