def clean(text, config=get_config(include_project_config=False)): """Remove elements from text based on mode and markup. Use config data to identify markup elements in the text and remove them. :param config: :param text: """ return remove_elements(text, config["markup"], config["settings"]["mode"])
def annotate(text, elements, own_validator=None, config=get_config(include_project_config=False)): """Find and annotate elements in text. Create an invaltree with elements and units of text, validate the rules to apply elements and augment the text with this result. Args: text (str): The first parameter is a text string. elements (list): It is a list of element dicts like the following: {'fox': {'value': '/wiki/fox', 'data-type': 'animal'}} own_validator (list): A list of functions that will be applied in the validation of an element, if it will be applied in the text. config (dict): Load default config from etc/ or get_config the default config andd update to your own rules. Returns: text (str): The annotated text. Examples: Basic example with config overwrite: >>> text = 'The quick brown fox jumps over the lazy dog.' >>> elements = [{'fox': { 'value': '/wiki/fox', 'data-type': 'animal'} }, {'dog': { 'value': '/wiki/dog', 'data-type': 'animal'} }] >>> cfg = get_config() >>> cfg['setting']['replaces_at_all'] = 1 >>> print annotate(text, elements, config=cfg) 'The quick brown <a href="/wiki/fox" data-type="animal">fox</a> jumps over the lazy dog .' """ settings = config["settings"] intervaltree, units, existing_values, existing_a_tags = intervals(text, elements, settings) to_be_applied = retrieve_hits(intervaltree, units, config, own_validator, existing_values, existing_a_tags) # apply the items finally, but start from end ...its not like horse riding! text = augment(text, to_be_applied) return_applied_links = settings.get("return_applied_links") if return_applied_links: applied_links = [item[0].data[1][1] for item in to_be_applied] return text, applied_links return text