Example #1
0
def clean(text, config=get_config(include_project_config=False)):
    """Remove elements from text based on mode and markup.

    Use config data to identify markup elements in the text and remove them.
    :param config:
    :param text:
    """
    return remove_elements(text, config["markup"], config["settings"]["mode"])
Example #2
0
def annotate(text, elements, own_validator=None, config=get_config(include_project_config=False)):
    """Find and annotate elements in text.

    Create an invaltree with elements and units of text, validate the rules
    to apply elements and augment the text with this result.

    Args:
        text (str): The first parameter is a text string.
        elements (list): It is a list of element dicts like the following:
            {'fox': {'value': '/wiki/fox', 'data-type': 'animal'}}
        own_validator (list): A list of functions that will be applied in the
            validation of an element, if it will be applied in the text.
        config (dict): Load default config from etc/ or get_config the default
            config andd update to your own rules.

    Returns:
        text (str): The annotated text.

    Examples:
        Basic example with config overwrite:

        >>> text = 'The quick brown fox jumps over the lazy dog.'
        >>> elements = [{'fox': {
                            'value': '/wiki/fox',
                            'data-type': 'animal'}
                        },
                        {'dog': {
                            'value': '/wiki/dog',
                            'data-type': 'animal'}
                        }]
        >>> cfg = get_config()
        >>> cfg['setting']['replaces_at_all'] = 1
        >>> print annotate(text, elements, config=cfg)
        'The quick brown <a href="/wiki/fox" data-type="animal">fox</a> jumps over the lazy dog .'
    """
    settings = config["settings"]
    intervaltree, units, existing_values, existing_a_tags = intervals(text, elements, settings)
    to_be_applied = retrieve_hits(intervaltree, units, config, own_validator, existing_values, existing_a_tags)

    # apply the items finally, but start from end ...its not like horse riding!
    text = augment(text, to_be_applied)

    return_applied_links = settings.get("return_applied_links")
    if return_applied_links:
        applied_links = [item[0].data[1][1] for item in to_be_applied]
        return text, applied_links

    return text