def check_state_handling_encode(self, encoding, u, s): for i in range(len(u)+1): d = codecs.getincrementalencoder(encoding)() part1 = d.encode(u[:i]) state = d.getstate() d = codecs.getincrementalencoder(encoding)() d.setstate(state) part2 = d.encode(u[i:], True) self.assertEqual(s, part1+part2)
def test_decoder_state(self): import codecs encoding = 'utf16' u = 'abc123' s = u.encode(encoding) for i in range(len(u) + 1): d = codecs.getincrementalencoder(encoding)() part1 = d.encode(u[:i]) state = d.getstate() d = codecs.getincrementalencoder(encoding)() d.setstate(state) part2 = d.encode(u[i:], True) assert s == part1 + part2
def __init__(self, f, dialect=csv.excel, encoding='utf-8', **kwds): # Redirect output to a queue self.queue = cStringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.stream.write(codecs.BOM_UTF8) # BOM for Excel self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, transport, encoding=None, errors="strict"): StreamTransportAdapter.__init__(self, transport) if encoding is None: encoding = sys.getfilesystemencoding() self.encoding = encoding self.encoder = codecs.getincrementalencoder(encoding)(errors) self.decoder = codecs.getincrementaldecoder(encoding)(errors)
def get_csv(self, dialect=csv.excel, encoding="utf-8" ): col_names_in_order = self.__class__.col_names_in_order col_names_to_display_names = self.__class__.col_names_to_display_names queue = cStringIO.StringIO() writer = csv.writer(queue, dialect=dialect) encoder = codecs.getincrementalencoder(encoding)() stream = cStringIO.StringIO() for row in self.results: row_values = [] for col_name in col_names_in_order: col_value = self._decorate_data_for_csv( row, col_name) row_values.append(col_value.encode("utf-8")) writer.writerow(row_values) # Fetch UTF-8 output from the queue ... data = queue.getvalue() data = data.decode("utf-8") # ... and re-encode it into the target encoding data = encoder.encode(data) # write to the target stream stream.write(data) # empty queue queue.truncate() return stream.getvalue()
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): """Init method.""" # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, fieldnames, **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.DictWriter(self.queue, fieldnames, **kwds) self.writer.writeheader() self.stream = f self.encoder = codecs.getincrementalencoder('utf-8')()
def csv_generator_p2(records, fields, include_header=True, header=None, dialect=csv.excel): def _row_string(row): writer.writerow(row) # Fetch UTF-8 output from the queue ... data = queue.getvalue() data = compat.to_unicode(data) # ... and reencode it into the target encoding data = encoder.encode(data) # empty queue queue.truncate(0) return data queue = compat.StringIO() writer = csv.writer(queue, dialect=dialect) encoder = codecs.getincrementalencoder("utf-8")() if include_header: yield _row_string(header or fields) for record in records: row = [] for field in fields: value = record.get(field) if isinstance(value, compat.string_type): row.append(value.encode("utf-8")) elif value is not None: row.append(compat.text_type(value)) else: row.append(None) yield _row_string(row)
def write_dataset_variable_tags(client, dataset, writer, verbose=False): # send request to get total count ws = mica.core.UriBuilder(['collected-dataset', dataset, 'variables']).query('from', 0).query('limit', 0).build() response = send_request(client, ws, verbose) total = response['total'] if 'total' in response else 0 encoder = codecs.getincrementalencoder('utf-8')() f = 0 while total > 0 and f < total: ws = mica.core.UriBuilder(['collected-dataset', dataset, 'variables']).query('from', f).query('limit', 1000).build() response = send_request(client, ws, verbose) f = f + 1000 # format response if 'variables' in response: for var in response['variables']: label = '' if 'attributes' in var: for attr in var['attributes']: if attr['name'] == 'label': label = attr['values'][0]['value'] for attr in var['attributes']: if 'namespace' in attr: tag = attr['namespace'] + '::' + attr['name'] + '.' + attr['values'][0]['value'] writer.writerow({'study': var['studyIds'][0], 'dataset': encoder.encode(var['datasetId']), 'name': encoder.encode(var['name']), 'index': str(var['index']), 'label': encoder.encode(label), 'tag': tag })
def do_command(args): """ Execute tags command """ file = sys.stdout if args.out: file = open(args.out, 'wb') writer = csv.DictWriter(file, fieldnames=['study','dataset','name','index','label', 'tag'], escapechar='"', quotechar='"', quoting=csv.QUOTE_ALL) writer.writeheader() client = mica.core.MicaClient.build(mica.core.MicaClient.LoginInfo.parse(args)) if args.dataset == None: ws = mica.core.UriBuilder(['collected-datasets']).query('from', 0).query('limit', 0).build() response = send_request(client, ws, args.verbose) total = response['total'] if 'total' in response else 0 encoder = codecs.getincrementalencoder('utf-8')() f = 0 while total > 0 and f < total: ws = mica.core.UriBuilder(['collected-datasets']).query('from', f).query('limit', 100).build() response = send_request(client, ws, args.verbose) f = f + 100 if 'datasets' in response: for ds in response['datasets']: write_dataset_variable_tags(client, encoder.encode(ds['id']), writer, args.verbose) else: write_dataset_variable_tags(client, args.dataset, writer, args.verbose)
def __init__( self, fd, dialect=csv.excel, encoding="utf-8", **kwds ): # Redirect output to a queue self.queue = StringIO() self.writer = csv.writer( self.queue, dialect=dialect, delimiter=';', quotechar='"', quoting=csv.QUOTE_NONNUMERIC, **kwds ) self.stream = fd self.encoder = codecs.getincrementalencoder( encoding )()
def __init__(self, f, dialect=csv.excel, encoding="latin-1", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoding = encoding self.encoder = codecs.getincrementalencoder(encoding)('replace')
def __init__(self, f, dialect='excel', encoding="utf-8", **kwds): import csv, cStringIO, codecs # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def check(encoding, input="<?xml version='1.0' encoding='x'?>gürk\u20ac"): # Check stateless encoder with encoding autodetection e = codecs.getencoder("xml") inputdecl = input.replace("'x'", repr(encoding)) assert e(inputdecl)[0].decode(encoding) == inputdecl # Check stateless encoder with specified encoding assert e(input, encoding=encoding)[0].decode(encoding) == inputdecl # Check incremental encoder with encoding autodetection ie = codecs.getincrementalencoder("xml")() assert b"".join(ie.iterencode(inputdecl)).decode(encoding) == inputdecl # Check incremental encoder with specified encoding ie = codecs.getincrementalencoder("xml")(encoding=encoding) assert b"".join(ie.iterencode(input)).decode(encoding) == inputdecl
def __init__(self, f, cols, dialect=csv.excel, encoding='utf-8', **kwds): # Redirect output to a queue self.cols = cols self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def test_incremental_encode(self): self.assertEquals( "".join(codecs.iterencode(u"python.org", "idna")), "python.org" ) self.assertEquals( "".join(codecs.iterencode(u"python.org.", "idna")), "python.org." ) self.assertEquals( "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")), "xn--pythn-mua.org." ) self.assertEquals( "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")), "xn--pythn-mua.org." ) encoder = codecs.getincrementalencoder("idna")() self.assertEquals(encoder.encode(u"\xe4x"), "") self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.") self.assertEquals(encoder.encode(u"", True), "org") encoder.reset() self.assertEquals(encoder.encode(u"\xe4x"), "") self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.") self.assertEquals(encoder.encode(u"", True), "")
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() self.quoting = kwds.get("quoting", None)
def test_incrementalencoder_final(self): encoder = codecs.getincrementalencoder(self.encoding)() last_index = len(self.text) - 1 output = b''.join( encoder.encode(char, index == last_index) for index, char in enumerate(self.text)) self.assertEqual(output, self.expected_reset)
def __init__(self, csv_file, columns): self.current_line = cStringIO.StringIO() self.writer = csv.DictWriter(self.current_line, columns, restval = 'NA', dialect = 'excel') self.stream = csv_file self.encoder = codecs.getincrementalencoder(UTF8)() self.writer.writeheader()
def get_csv_row_writer(stream, dialect=csv.excel, encoding="utf-8", **kwargs): """ Create a csv, encoding from unicode, row writer. Use returned callable to write rows of unicode data to a stream, such as a file opened in write mode, in utf-8(or another) encoding. :: my_row_data = [ [u'one', u'two'], [u'three', u'four'], ] with open('myfile.csv', 'wt') as myfile: unicode_row_writer = get_unicode_row_writer(myfile) for row in my_row_data: unicode_row_writer(row) """ if is_py3(): writer = csv.writer(stream, dialect=dialect, **kwargs) return writer.writerow else: queue = StringIO() writer = csv.writer(queue, dialect=dialect, **kwargs) encoder = codecs.getincrementalencoder(encoding)() return partial(_encode_write_row, stream, queue, writer, encoder)
def __init__(self, f, dialect=csv.excel, **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder("UTF-8")() self.stream.write(codecs.BOM_UTF8)
def __init__(self, f, fields, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.DictWriter(self.queue, fieldnames=fields, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f encoder_cls = codecs.getincrementalencoder(encoding) self.encoder = encoder_cls(errors=errors)
def encode(self, text_utf8, text_latex, inputenc=None, errors='strict'): encoding = 'latex+' + inputenc if inputenc else 'latex' encoder = codecs.getincrementalencoder(encoding)(errors=errors) encoded_parts = ( encoder.encode(text_utf8_part, final) for text_utf8_part, final in split_input(text_utf8)) self.assertEqual(text_latex, b''.join(encoded_parts))
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() self.ERROR_STRING = 'ERROR'.encode("utf-8")
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() # created a writer with Excel formating settings self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, encoding='utf-8', **kwds): # Redirect output to a queue self.encoding = encoding self.queue = StringIO.StringIO() self.writer = csv.writer(self.queue, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, fieldnames, restval="", extrasaction="raise", dialect="excel", encoding="utf-8", *args, **kwds): csv.DictWriter.__init__(self, f, fieldnames, restval, extrasaction, dialect, *args, **kwds) # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, fieldnames, dialect=csv.excel, restval="", encoding="utf-8", ignore_errors=False, **kwds): # Redirect output to a queue self.restval = restval self.fieldnames = fieldnames self.writer = _UnicodeWriter(f, dialect=dialect, fieldnames=fieldnames, encoding=encoding, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() self.ignore_errors = ignore_errors
def __init__(self, fp, dialect=csv.excel, encoding='utf-8', **kwargs): if sys.version_info[0] < 3: self.queue = io.BytesIO() else: self.queue = io.StringIO(newline='') self.writer = csv.writer(self.queue, dialect=dialect, **kwargs) self.stream = fp self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds): self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = getincrementalencoder(encoding)()
def __init__(self, timeout=30, maxread=2000, searchwindowsize=None, logfile=None, encoding=None, codec_errors='strict'): self.stdin = sys.stdin self.stdout = sys.stdout self.stderr = sys.stderr self.searcher = None self.ignorecase = False self.before = None self.after = None self.match = None self.match_index = None self.terminated = True self.exitstatus = None self.signalstatus = None # status returned by os.waitpid self.status = None # the child file descriptor is initially closed self.child_fd = -1 self.timeout = timeout self.delimiter = EOF self.logfile = logfile # input from child (read_nonblocking) self.logfile_read = None # output to send (send, sendline) self.logfile_send = None # max bytes to read at one time into buffer self.maxread = maxread # This is the read buffer. See maxread. self.buffer = bytes() if (encoding is None) else text_type() # Data before searchwindowsize point is preserved, but not searched. self.searchwindowsize = searchwindowsize # Delay used before sending data to child. Time in seconds. # Set this to None to skip the time.sleep() call completely. self.delaybeforesend = 0.05 # Used by close() to give kernel time to update process status. # Time in seconds. self.delayafterclose = 0.1 # Used by terminate() to give kernel time to update process status. # Time in seconds. self.delayafterterminate = 0.1 # Delay in seconds to sleep after each call to read_nonblocking(). # Set this to None to skip the time.sleep() call completely: that # would restore the behavior from pexpect-2.0 (for performance # reasons or because you don't want to release Python's global # interpreter lock). self.delayafterread = 0.0001 self.softspace = False self.name = '<' + repr(self) + '>' self.closed = True # Unicode interface self.encoding = encoding self.codec_errors = codec_errors if encoding is None: # bytes mode (accepts some unicode for backwards compatibility) self._encoder = self._decoder = _NullCoder() self.string_type = bytes self.crlf = b'\r\n' if PY3: self.allowed_string_types = (bytes, str) self.linesep = os.linesep.encode('ascii') def write_to_stdout(b): try: return sys.stdout.buffer.write(b) except AttributeError: # If stdout has been replaced, it may not have .buffer return sys.stdout.write(b.decode('ascii', 'replace')) self.write_to_stdout = write_to_stdout else: self.allowed_string_types = (basestring, ) # analysis:ignore self.linesep = os.linesep self.write_to_stdout = sys.stdout.write else: # unicode mode self._encoder = codecs.getincrementalencoder(encoding)( codec_errors) self._decoder = codecs.getincrementaldecoder(encoding)( codec_errors) self.string_type = text_type self.crlf = u'\r\n' self.allowed_string_types = (text_type, ) if PY3: self.linesep = os.linesep else: self.linesep = os.linesep.decode('ascii') # This can handle unicode in both Python 2 and 3 self.write_to_stdout = sys.stdout.write # storage for async transport self.async_pw_transport = None
#end_pymotw_header import codecs import sys from codecs_to_hex import to_hex text = b'abcdefghijklmnopqrstuvwxyz\n' repetitions = 50 print('Text length :', len(text)) print('Repetitions :', repetitions) print('Expected len:', len(text) * repetitions) # Encode the text several times to build up a # large amount of data encoder = codecs.getincrementalencoder('bz2')() encoded = [] print() print('Encoding:', end=' ') last = repetitions - 1 for i in range(repetitions): en_c = encoder.encode(text, final=(i == last)) if en_c: print('\nEncoded : {} bytes'.format(len(en_c))) encoded.append(en_c) else: sys.stdout.write('.') all_encoded = b''.join(encoded) print()
def __init__(self, f, dialect=csv.excel, encoding=WRITER_ENCODING, **kwds): # Redirect output to a queue self.queue = io.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)()
def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds): self.stream = f self.writer = csv.writer(self.stream, dialect=dialect, **kwds) self.encoder = codecs.getincrementalencoder(encoding)()
def set_tx_encoding(self, encoding, errors='replace'): """set encoding for transmitted data""" self.output_encoding = encoding self.tx_encoder = codecs.getincrementalencoder(encoding)(errors)