def rollover(self): """Roll the StringIO over to a TempFile""" if not self._rolled: tmp = EncodedFile(TemporaryFile(), data_encoding='utf-8') pos = self.buffer.tell() tmp.write(self.buffer.getvalue()) tmp.seek(pos) self.buffer.close() self._buffer = tmp
def rollover(self): """Roll the StringIO over to a TempFile""" if not self._rolled: tmp = EncodedFile(TemporaryFile(dir=self._dir), data_encoding='utf-8') pos = self.buffer.tell() tmp.write(self.buffer.getvalue()) tmp.seek(pos) self.buffer.close() self._buffer = tmp
def convert_to_tags(self): """ Read in the file one line at a time. Get the important info, between [:16]. Check if this info matches a dictionary entry. If it does, call the appropriate function. The functions that are called: a text function for text an open function for open tags an open with attribute function for tags with attributes an empty with attribute function for tags that are empty but have attribtes. a closed function for closed tags. an empty tag function. """ self.__initiate_values() with open(self.__write_to, 'w') as self.__write_obj: self.__write_dec() with open(self.__file, 'r') as read_obj: for line in read_obj: self.__token_info = line[:16] action = self.__state_dict.get(self.__token_info) if action is not None: action(line) # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml if self.__convert_utf or self.__bad_encoding: copy_obj = copy.Copy(bug_handler=self.__bug_handler) copy_obj.rename(self.__write_to, self.__file) file_encoding = "utf-8" if self.__bad_encoding: file_encoding = "us-ascii" with open(self.__file, 'r') as read_obj: with open(self.__write_to, 'w') as write_obj: write_objenc = EncodedFile(write_obj, self.__encoding, file_encoding, 'replace') for line in read_obj: write_objenc.write(line) copy_obj = copy.Copy(bug_handler=self.__bug_handler) if self.__copy: copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.rename(self.__write_to, self.__file) os.remove(self.__write_to)
class DocGenerator(xmlapp.Application): def __init__(self, out=None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out def handle_pi(self, target, remainder): self.out.write("<?%s %s?>" % (target, remainder)) def handle_start_tag(self, name, amap): self.out.write("<" + name) for (name, value) in amap.items(): self.out.write(' %s="%s"' % (name, escape_attval(value))) self.out.write(">") def handle_end_tag(self, name): self.out.write("</%s>" % name) def handle_ignorable_data(self, data, start_ix, end_ix): self.out.write(escape_content(data[start_ix:end_ix])) def handle_data(self, data, start_ix, end_ix): self.out.write(escape_content(data[start_ix:end_ix]))
class DocGenerator(xmlapp.Application): def __init__(self, out = None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out def handle_pi(self, target, remainder): self.out.write("<?%s %s?>" % (target, remainder)) def handle_start_tag(self,name,amap): self.out.write("<"+name) for (name, value) in amap.items(): self.out.write(' %s="%s"' % (name, escape_attval(value))) self.out.write(">") def handle_end_tag(self,name): self.out.write("</%s>" % name) def handle_ignorable_data(self,data,start_ix,end_ix): self.out.write(escape_content(data[start_ix:end_ix])) def handle_data(self,data,start_ix,end_ix): self.out.write(escape_content(data[start_ix:end_ix]))