Exemple #1
0
 def rollover(self):
     """Roll the StringIO over to a TempFile"""
     if not self._rolled:
         tmp = EncodedFile(TemporaryFile(), data_encoding='utf-8')
         pos = self.buffer.tell()
         tmp.write(self.buffer.getvalue())
         tmp.seek(pos)
         self.buffer.close()
         self._buffer = tmp
Exemple #2
0
 def rollover(self):
     """Roll the StringIO over to a TempFile"""
     if not self._rolled:
         tmp = EncodedFile(TemporaryFile(dir=self._dir),
                           data_encoding='utf-8')
         pos = self.buffer.tell()
         tmp.write(self.buffer.getvalue())
         tmp.seek(pos)
         self.buffer.close()
         self._buffer = tmp
 def convert_to_tags(self):
     """
     Read in the file one line at a time. Get the important info, between
     [:16]. Check if this info matches a dictionary entry. If it does, call
     the appropriate function.
     The functions that are called:
         a text function for text
         an open function for open tags
         an open with attribute function for tags with attributes
         an empty with attribute function for tags that are empty but have
         attribtes.
         a closed function for closed tags.
         an empty tag function.
         """
     self.__initiate_values()
     with open(self.__write_to, 'w') as self.__write_obj:
         self.__write_dec()
         with open(self.__file, 'r') as read_obj:
             for line in read_obj:
                 self.__token_info = line[:16]
                 action = self.__state_dict.get(self.__token_info)
                 if action is not None:
                     action(line)
     # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
     if self.__convert_utf or self.__bad_encoding:
         copy_obj = copy.Copy(bug_handler=self.__bug_handler)
         copy_obj.rename(self.__write_to, self.__file)
         file_encoding = "utf-8"
         if self.__bad_encoding:
             file_encoding = "us-ascii"
         with open(self.__file, 'r') as read_obj:
             with open(self.__write_to, 'w') as write_obj:
                 write_objenc = EncodedFile(write_obj, self.__encoding,
                                            file_encoding, 'replace')
                 for line in read_obj:
                     write_objenc.write(line)
     copy_obj = copy.Copy(bug_handler=self.__bug_handler)
     if self.__copy:
         copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
     copy_obj.rename(self.__write_to, self.__file)
     os.remove(self.__write_to)
Exemple #4
0
 def convert_to_tags(self):
     """
     Read in the file one line at a time. Get the important info, between
     [:16]. Check if this info matches a dictionary entry. If it does, call
     the appropriate function.
     The functions that are called:
         a text function for text
         an open function for open tags
         an open with attribute function for tags with attributes
         an empty with attribute function for tags that are empty but have
         attribtes.
         a closed function for closed tags.
         an empty tag function.
         """
     self.__initiate_values()
     with open(self.__write_to, 'w') as self.__write_obj:
         self.__write_dec()
         with open(self.__file, 'r') as read_obj:
             for line in read_obj:
                 self.__token_info = line[:16]
                 action = self.__state_dict.get(self.__token_info)
                 if action is not None:
                     action(line)
     # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
     if self.__convert_utf or self.__bad_encoding:
         copy_obj = copy.Copy(bug_handler=self.__bug_handler)
         copy_obj.rename(self.__write_to, self.__file)
         file_encoding = "utf-8"
         if self.__bad_encoding:
             file_encoding = "us-ascii"
         with open(self.__file, 'r') as read_obj:
             with open(self.__write_to, 'w') as write_obj:
                 write_objenc = EncodedFile(write_obj, self.__encoding,
                                 file_encoding, 'replace')
                 for line in read_obj:
                     write_objenc.write(line)
     copy_obj = copy.Copy(bug_handler=self.__bug_handler)
     if self.__copy:
         copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
     copy_obj.rename(self.__write_to, self.__file)
     os.remove(self.__write_to)
Exemple #5
0
class DocGenerator(xmlapp.Application):
    def __init__(self, out=None):
        if not out:
            self.out = EncodedFile(sys.stdout, "utf-8")
        else:
            self.out = out

    def handle_pi(self, target, remainder):
        self.out.write("<?%s %s?>" % (target, remainder))

    def handle_start_tag(self, name, amap):
        self.out.write("<" + name)
        for (name, value) in amap.items():
            self.out.write(' %s="%s"' % (name, escape_attval(value)))
        self.out.write(">")

    def handle_end_tag(self, name):
        self.out.write("</%s>" % name)

    def handle_ignorable_data(self, data, start_ix, end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))

    def handle_data(self, data, start_ix, end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))
Exemple #6
0
class DocGenerator(xmlapp.Application):

    def __init__(self, out = None):
        if not out:
            self.out = EncodedFile(sys.stdout, "utf-8")
        else:
            self.out = out
    
    def handle_pi(self, target, remainder):
        self.out.write("<?%s %s?>" % (target, remainder))

    def handle_start_tag(self,name,amap):
        self.out.write("<"+name)
        for (name, value) in amap.items():
            self.out.write(' %s="%s"' % (name, escape_attval(value)))
        self.out.write(">")

    def handle_end_tag(self,name):
        self.out.write("</%s>" % name)

    def handle_ignorable_data(self,data,start_ix,end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))

    def handle_data(self,data,start_ix,end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))