def parse_string(self, text): """Parses a text string and returns a SourceDoc. Simply dumps the full string into the text variable of the SourceDoc.""" sourcedoc = SourceDoc(None) # TODO: do we need to ensure the text is unicode? sourcedoc.text = text return TarsqiDocument(sourcedoc, {})
def parse_file(self, filename): """Parses filename and returns a SourceDoc. Simply dumps the full file content into the text variable of the SourceDoc.""" sourcedoc = SourceDoc(filename) sourcedoc.text = codecs.open(filename, encoding='utf8').read() return TarsqiDocument(sourcedoc, {})