Exemple #1
0
def text_markup(text, termiteAddr='http://localhost:9090/termite', vocabs=['GENE', 'INDICATION', 'DRUG'],
				normalisation='id', wrap=False, wrapChars=('{!', '!}'), substitute=True, replacementDict=None,
				termite_http_user=None, termite_http_pass=None):
	'''
	Receives plain text, returns text with TERMited substitutions.

	:param str normalisation: Type of normalisation to substitute/add (must be 'id', 'type', 'name', 'typeplusname' or 'typeplusid')
	:param bool substitute: Whether to replace the found term (or add normalisation alongside)
	:param bool wrap: Whether to wrap found hits with 'bookends'
	:param tuple(str) wrapChars: Tuple of length 2, containing strings to insert at start/end of found hits
	:param array(str) vocabs: List of vocabs to be substituted, ordered by priority. These vocabs MUST be in the TERMite results. If left
	empty, all vocabs found will be used with random priority where overlaps are found.
	:param dict replacementDict: Dictionary with <VOCAB>:<string_to_replace_hits_in_vocab>. '~ID~' will be replaced with the entity id,
	and '~TYPE~' will be replaced with the vocab name. Example: {'GENE':'ENTITY_~TYPE~_~ID~'} would result in BRCA1 -> ENTITY_GENE_BRCA1
	replacementDict supercedes normalisation. ~NAME~ can also be used to get the preferred name.
	:return str:
	'''

	t = termite.TermiteRequestBuilder()
	t.set_url(termiteAddr)
	t.set_text(text)
	t.set_entities(','.join(vocabs))
	t.set_subsume(True)
	t.set_input_format("txt")
	t.set_output_format("doc.jsonx")
	if termite_http_pass:
		t.set_basic_auth(termite_http_user, termite_http_pass, verification=False)
	docjsonx = t.execute()

	return markup(docjsonx, vocabs=vocabs, normalisation=normalisation, wrap=wrap,
				  wrapChars=wrapChars, substitute=substitute, replacementDict=replacementDict)[0]['termited_text']
"""

__author__ = 'SciBite DataScience'
__version__ = '0.2'
__copyright__ = '(c) 2019, SciBite Ltd'
__license__ = 'Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License'

from termite_toolkit import termite
from pprint import pprint

input_file = "fuzzy_sample.txt"
termite_home = "http://localhost:9090/termite"
entities = "INDICATION,HUCELL,GENE,DRUG"

# build the request
t = termite.TermiteRequestBuilder()
t.set_input_format("csv")
t.set_url(termite_home)
t.set_binary_content(input_file)
t.set_subsume(True)
t.set_entities(entities)
t.set_fuzzy(True)

# make request
result = t.execute(display_request=True)

# do some post-processing
filtered_hits = termite.get_entity_hits_from_json(result,
                                                  entities,
                                                  reject_ambig=False)
pprint(filtered_hits)