def __init__(self): # For parsing text file self.sent_tokenizer = SentenceTokenizer() self.word_tokenizer = WordTokenizer() # Internal representation natural for i2b2 format self.text = '' self.data = [] # list of list of tokens self.line_inds = [] self.classifications = [] self.fileName = 'no-file'
###################################################################### __author__ = 'Willie Boag' __date__ = 'Aug 2, 2015' import string import sys import re import nltk from abstract_note import AbstractNote from utilities_for_notes import concept_cmp, classification_cmp from utilities_for_notes import lineno_and_tokspan, lno_and_tokspan__to__char_span from utilities_for_notes import WordTokenizer, SentenceTokenizer word_tokenizer = WordTokenizer() sent_tokenizer = SentenceTokenizer() class Note_plain(AbstractNote): def __init__(self): # Internal representation natural for i2b2 format self.data = [] # list of list of tokens self.classifications = [] # list of concept tuples self.line_inds = [] # list of (start,end) indices for every line def getExtension(self): return 'plain' def getText(self): return self.text