Beispiel #1
0
    def __init__(self):
        # For parsing text file
        self.sent_tokenizer = SentenceTokenizer()
        self.word_tokenizer = WordTokenizer()

        # Internal representation natural for i2b2 format
        self.text = ''
        self.data = []  # list of list of tokens
        self.line_inds = []
        self.classifications = []
        self.fileName = 'no-file'
Beispiel #2
0
######################################################################

__author__ = 'Willie Boag'
__date__ = 'Aug 2, 2015'

import string
import sys
import re
import nltk

from abstract_note import AbstractNote
from utilities_for_notes import concept_cmp, classification_cmp
from utilities_for_notes import lineno_and_tokspan, lno_and_tokspan__to__char_span
from utilities_for_notes import WordTokenizer, SentenceTokenizer

word_tokenizer = WordTokenizer()
sent_tokenizer = SentenceTokenizer()


class Note_plain(AbstractNote):
    def __init__(self):
        # Internal representation natural for i2b2 format
        self.data = []  # list of list of tokens
        self.classifications = []  # list of concept tuples
        self.line_inds = []  # list of (start,end) indices for every line

    def getExtension(self):
        return 'plain'

    def getText(self):
        return self.text