Beispiel #1
0
 def parse_string(self, text, tarsqidoc):
     """Parse the TTK string and put the contents in the appropriate parts of the
     SourceDoc."""
     self.lif = LIF(json_string=text)
     tarsqidoc.sourcedoc = SourceDoc()
     tarsqidoc.sourcedoc.text = self.lif.text.value
     tarsqidoc.sourcedoc.lif = self.lif
Beispiel #2
0
 def parse_string(self, text):
     """Parses a text string and returns a SourceDoc. Simply dumps the full
     string into the text variable of the SourceDoc."""
     sourcedoc = SourceDoc(None)
     # TODO: do we need to ensure the text is unicode?
     sourcedoc.text = text
     return TarsqiDocument(sourcedoc, {})
Beispiel #3
0
 def _parse(self, tarsqidoc):
     self.sourcedoc = SourceDoc(None)
     self.tarsqidoc = tarsqidoc
     self.tarsqidoc.sourcedoc = self.sourcedoc
     self.sourcedoc.text = self.topnodes['text'].firstChild.data
     self._add_source_tags()
     self._add_tarsqi_tags()
     self._add_comments()
     self._add_metadata()
Beispiel #4
0
 def parse_string(self, text, tarsqidoc):
     """Parses a text string and returns a SourceDoc. Uses the ParseFile routine of
     the expat parser, where all the handlers are set up to fill in the text
     and tags in SourceDoc."""
     self.sourcedoc = SourceDoc(None)
     # TODO: do we need to make sure that text is unicode?
     self.parser.Parse(text)
     self.sourcedoc.finish()
     tarsqidoc.sourcedoc = self.sourcedoc
Beispiel #5
0
 def parse_file(self, filename, tarsqidoc):
     """Parses filename and returns a SourceDoc. Uses the ParseFile routine
     of the expat parser, where all the handlers are set up to fill in the
     text and tags in SourceDoc."""
     self.sourcedoc = SourceDoc(filename)
     # TODO: should this be codecs.open() for non-ascii?
     self.parser.ParseFile(open(filename))
     self.sourcedoc.finish()
     tarsqidoc.sourcedoc = self.sourcedoc
Beispiel #6
0
 def parse_file(self, filename):
     """Parse the TTK file and put the contents in the appropriate parts of
     the SourceDoc."""
     self._load_dom(filename)
     self.sourcedoc = SourceDoc(filename)
     self.tarsqidoc = TarsqiDocument(self.sourcedoc, {})
     self.sourcedoc.text = self.topnodes['text'].firstChild.data
     self._add_source_tags()
     self._add_tarsqi_tags()
     self._add_comments()
     self._add_metadata()
     return self.tarsqidoc
Beispiel #7
0
 def parse_file(self, filename, tarsqidoc):
     """Parse the TTK file and put the contents in the appropriate parts of
     the SourceDoc."""
     if self.is_container(filename):
         self.container = Container(json_file=filename)
         self.lif = self.container.payload
     else:
         self.container = None
         self.lif = LIF(json_file=filename)
     tarsqidoc.sourcedoc = SourceDoc(filename)
     tarsqidoc.sourcedoc.text = self.lif.text.value
     tarsqidoc.sourcedoc.lif = self.lif
     tarsqidoc.sourcedoc.lif_container = self.container
Beispiel #8
0
 def parse_file(self, filename, tarsqidoc):
     """Parses filename and returns a SourceDoc. Uses the ParseFile routine
     of the expat parser, where all the handlers are set up to fill in the
     text and tags in SourceDoc."""
     self.sourcedoc = SourceDoc(filename)
     # TODO: should this be codecs.open() for non-ascii?
     # self.parser.ParseFile(open(filename))
     # NOTE: actually, the above line needed to replaced with the following
     # while preparing to port code to Python3.
     content = codecs.open(filename).read()
     self.parser.Parse(content)
     self.sourcedoc.finish()
     tarsqidoc.sourcedoc = self.sourcedoc
Beispiel #9
0
 def parse_file(self, filename):
     """Parses filename and returns a SourceDoc. Simply dumps the full file
     content into the text variable of the SourceDoc."""
     sourcedoc = SourceDoc(filename)
     sourcedoc.text = codecs.open(filename, encoding='utf8').read()
     return TarsqiDocument(sourcedoc, {})
Beispiel #10
0
 def parse_string(self, text, tarsqidoc):
     """Parses a text string and returns a SourceDoc. Simply dumps the full
     string into the text variable of the SourceDoc."""
     tarsqidoc.sourcedoc = SourceDoc(None)
     # TODO: do we need to ensure the text is unicode?
     tarsqidoc.sourcedoc.text = text