def dothenlp(): if request.method == "POST": try: myData = request.json['myData'] recogniser = ternip.recogniser() normaliser = ternip.normaliser() doc = TernDocument(myData) strdoc = str(doc) ref_date = find_between(strdoc, "<DATE_TIME>", "</DATE_TIME>").replace("-", "") sents = recogniser.tag(doc.get_sents()) normaliser.annotate(sents, ref_date) doc.reconcile(sents) # Single Sentences s = find_between(str(doc), "<TEXT>", "</TEXT>").replace("\n", " <br>") s = s.replace("_QUOTE_", """) s = s.replace("_APOSTROPHE_", "'") s = s.replace("_AND_", "&") sent_tokens = nltk.sent_tokenize(s) t = str(doc).split("<TEXT>")[0] t = t.replace("_QUOTE_", """) t = t.replace("_APOSTROPHE_", "'") t = t.replace("_AND_", "&") output = t + "\n\n<SENTENCES>\n\n" + str( sent_tokens) + "\n\n</SENTENCES>" return jsonify(result=output) except: return jsonify(result="something wrong")
def normalize_temporal_expressions(content, reference_date): """ Constructs a corpus from documents. Params: content (str): Tokenized string reference_date (date): Reference date. """ recogniser = ternip.recogniser() normaliser = ternip.normaliser() content = f'<TimeML>\n{content}\n</TimeML>' doc = TimeMlDocument(content,"TimeML") sents = recogniser.tag(doc.get_sents()) normaliser.annotate(sents, reference_date.strftime('%Y%m%d')) doc.reconcile(sents) xml_str = str(doc) unsupported_annotations = ["T24","T24","TMO", "TAF", "TEV", "TNI"] for ua in unsupported_annotations: xml_str = xml_str.replace(ua,"") return xml_str
def dothenlp(): if request.method == "POST": try: myData = request.json['myData'] recogniser = ternip.recogniser() normaliser = ternip.normaliser() doc = TernDocument(myData) strdoc = str(doc) ref_date = find_between(strdoc, "<DATE_TIME>", "</DATE_TIME>").replace("-","") sents = recogniser.tag(doc.get_sents()) normaliser.annotate(sents, ref_date) doc.reconcile(sents) # Single Sentences s = find_between(str(doc), "<TEXT>", "</TEXT>").replace("\n"," <br>") s = s.replace("_QUOTE_",""") s = s.replace("_APOSTROPHE_","'") s = s.replace("_AND_","&") sent_tokens = nltk.sent_tokenize(s) t = str(doc).split("<TEXT>")[0] t = t.replace("_QUOTE_",""") t = t.replace("_APOSTROPHE_","'") t = t.replace("_AND_","&") output = t + "\n\n<SENTENCES>\n\n" + str(sent_tokens) + "\n\n</SENTENCES>" return jsonify(result=output) except: return jsonify(result="something wrong")
from score_entities import score_entities console = logging.StreamHandler() console.setFormatter(logging.Formatter('[%(asctime)s] %(name)-12s %(levelname)-8s %(message)s')) logging.getLogger().addHandler(console) logging.getLogger('ternip').setLevel(logging.INFO) print print "TERNIP TempEval-2 evaluator" print # Load TERNIP recogniser = ternip.recogniser() print "TERNIP loaded", recogniser.num_rules, "recognition rules" normaliser = ternip.normaliser() print "TERNIP loaded", normaliser.num_rules, "normalisation rules" print print "Loading data..." # Load testing data data_path = os.path.normpath('../sample_data/tempeval-training-2/english/data/') with open(os.path.join(data_path, 'base-segmentation.tab')) as fd: with open(os.path.join(data_path, 'dct.txt')) as dct_fd: docs = TempEval2Document.load_multi(fd.read(), dct_fd.read()) temp = tempfile.mkdtemp() ternip_extents = open(os.path.join(temp, 'ternip-extents.tab'), 'w') ternip_attrs = open(os.path.join(temp, 'ternip-attrs.tab'), 'w')