コード例 #1
0
ファイル: app.py プロジェクト: wyzdeutsch/TimeLineCurator
def dothenlp():

    if request.method == "POST":
        try:
            myData = request.json['myData']
            recogniser = ternip.recogniser()
            normaliser = ternip.normaliser()
            doc = TernDocument(myData)
            strdoc = str(doc)
            ref_date = find_between(strdoc, "<DATE_TIME>",
                                    "</DATE_TIME>").replace("-", "")

            sents = recogniser.tag(doc.get_sents())
            normaliser.annotate(sents, ref_date)
            doc.reconcile(sents)
            # Single Sentences
            s = find_between(str(doc), "<TEXT>",
                             "</TEXT>").replace("\n", " <br>")
            s = s.replace("_QUOTE_", "&quot;")
            s = s.replace("_APOSTROPHE_", "&#39;")
            s = s.replace("_AND_", "&")

            sent_tokens = nltk.sent_tokenize(s)

            t = str(doc).split("<TEXT>")[0]
            t = t.replace("_QUOTE_", "&quot;")
            t = t.replace("_APOSTROPHE_", "&#39;")
            t = t.replace("_AND_", "&")

            output = t + "\n\n<SENTENCES>\n\n" + str(
                sent_tokens) + "\n\n</SENTENCES>"
            return jsonify(result=output)
        except:
            return jsonify(result="something wrong")
コード例 #2
0
def normalize_temporal_expressions(content, reference_date):
    """
    Constructs a corpus from documents.

    Params:
        content (str): Tokenized string
        reference_date (date): Reference date.

    """

    recogniser = ternip.recogniser() 
    normaliser = ternip.normaliser()

    content = f'<TimeML>\n{content}\n</TimeML>'
    doc = TimeMlDocument(content,"TimeML")
    sents = recogniser.tag(doc.get_sents())

    normaliser.annotate(sents, reference_date.strftime('%Y%m%d'))
    doc.reconcile(sents)

    xml_str = str(doc)

    unsupported_annotations = ["T24","T24","TMO", "TAF", "TEV", "TNI"]
    
    for ua in unsupported_annotations:
        xml_str = xml_str.replace(ua,"")

    return xml_str
コード例 #3
0
ファイル: app.py プロジェクト: jo-fu/TLC-OverviewPlugin
def dothenlp():

    if request.method == "POST":
        try:
            myData = request.json['myData']
            recogniser = ternip.recogniser()
            normaliser = ternip.normaliser()
            doc = TernDocument(myData)
            strdoc = str(doc)
            ref_date = find_between(strdoc, "<DATE_TIME>", "</DATE_TIME>").replace("-","")

            sents = recogniser.tag(doc.get_sents())
            normaliser.annotate(sents, ref_date)
            doc.reconcile(sents)
            # Single Sentences
            s = find_between(str(doc), "<TEXT>", "</TEXT>").replace("\n"," <br>")
            s = s.replace("_QUOTE_","&quot;")
            s = s.replace("_APOSTROPHE_","&#39;")
            s = s.replace("_AND_","&")

            sent_tokens = nltk.sent_tokenize(s)

            t = str(doc).split("<TEXT>")[0]
            t = t.replace("_QUOTE_","&quot;")
            t = t.replace("_APOSTROPHE_","&#39;")
            t = t.replace("_AND_","&")

            output = t + "\n\n<SENTENCES>\n\n" + str(sent_tokens) + "\n\n</SENTENCES>"
            return jsonify(result=output)
        except:
            return jsonify(result="something wrong")
コード例 #4
0
from score_entities import score_entities

console = logging.StreamHandler()
console.setFormatter(logging.Formatter('[%(asctime)s] %(name)-12s %(levelname)-8s %(message)s'))
logging.getLogger().addHandler(console)
logging.getLogger('ternip').setLevel(logging.INFO)

print
print "TERNIP TempEval-2 evaluator"
print

# Load TERNIP
recogniser = ternip.recogniser()
print "TERNIP loaded", recogniser.num_rules, "recognition rules"
normaliser = ternip.normaliser()
print "TERNIP loaded", normaliser.num_rules, "normalisation rules"
print
print "Loading data..."

# Load testing data
data_path = os.path.normpath('../sample_data/tempeval-training-2/english/data/')
with open(os.path.join(data_path, 'base-segmentation.tab')) as fd:
    with open(os.path.join(data_path, 'dct.txt')) as dct_fd:
        docs = TempEval2Document.load_multi(fd.read(), dct_fd.read())

temp = tempfile.mkdtemp()

ternip_extents = open(os.path.join(temp, 'ternip-extents.tab'), 'w')
ternip_attrs = open(os.path.join(temp, 'ternip-attrs.tab'), 'w')