Ejemplo n.º 1
0
 def check_state_handling_encode(self, encoding, u, s):
     for i in range(len(u)+1):
         d = codecs.getincrementalencoder(encoding)()
         part1 = d.encode(u[:i])
         state = d.getstate()
         d = codecs.getincrementalencoder(encoding)()
         d.setstate(state)
         part2 = d.encode(u[i:], True)
         self.assertEqual(s, part1+part2)
Ejemplo n.º 2
0
 def test_decoder_state(self):
     import codecs
     encoding = 'utf16'
     u = 'abc123'
     s = u.encode(encoding)
     for i in range(len(u) + 1):
         d = codecs.getincrementalencoder(encoding)()
         part1 = d.encode(u[:i])
         state = d.getstate()
         d = codecs.getincrementalencoder(encoding)()
         d.setstate(state)
         part2 = d.encode(u[i:], True)
         assert s == part1 + part2
Ejemplo n.º 3
0
 def __init__(self, f, dialect=csv.excel, encoding='utf-8', **kwds):
     # Redirect output to a queue
     self.queue = cStringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.stream.write(codecs.BOM_UTF8)  # BOM for Excel
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 4
0
 def __init__(self, transport, encoding=None, errors="strict"):
     StreamTransportAdapter.__init__(self, transport)
     if encoding is None:
         encoding = sys.getfilesystemencoding()
     self.encoding = encoding
     self.encoder = codecs.getincrementalencoder(encoding)(errors)
     self.decoder = codecs.getincrementaldecoder(encoding)(errors)
Ejemplo n.º 5
0
    def get_csv(self, dialect=csv.excel, encoding="utf-8" ):
        col_names_in_order = self.__class__.col_names_in_order
        col_names_to_display_names = self.__class__.col_names_to_display_names

        queue = cStringIO.StringIO()
        writer = csv.writer(queue, dialect=dialect)
        encoder = codecs.getincrementalencoder(encoding)()

        stream = cStringIO.StringIO()
        for row in self.results:
            row_values = []
            for col_name in col_names_in_order:
                col_value = self._decorate_data_for_csv( row, col_name)
                row_values.append(col_value.encode("utf-8"))

            writer.writerow(row_values)

            # Fetch UTF-8 output from the queue ...
            data = queue.getvalue()
            data = data.decode("utf-8")
            # ... and re-encode it into the target encoding
            data = encoder.encode(data)
            # write to the target stream
            stream.write(data)
            # empty queue
            queue.truncate()

        return stream.getvalue()
Ejemplo n.º 6
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     """Init method."""
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
 def __init__(self, f, fieldnames, **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.DictWriter(self.queue, fieldnames, **kwds)
     self.writer.writeheader()
     self.stream = f
     self.encoder = codecs.getincrementalencoder('utf-8')()
Ejemplo n.º 8
0
def csv_generator_p2(records, fields, include_header=True, header=None,
                     dialect=csv.excel):

    def _row_string(row):
        writer.writerow(row)
        # Fetch UTF-8 output from the queue ...
        data = queue.getvalue()
        data = compat.to_unicode(data)
        # ... and reencode it into the target encoding
        data = encoder.encode(data)
        # empty queue
        queue.truncate(0)

        return data

    queue = compat.StringIO()
    writer = csv.writer(queue, dialect=dialect)
    encoder = codecs.getincrementalencoder("utf-8")()

    if include_header:
        yield _row_string(header or fields)

    for record in records:
        row = []
        for field in fields:
            value = record.get(field)
            if isinstance(value, compat.string_type):
                row.append(value.encode("utf-8"))
            elif value is not None:
                row.append(compat.text_type(value))
            else:
                row.append(None)

        yield _row_string(row)
Ejemplo n.º 9
0
Archivo: tags.py Proyecto: Rima-B/mica2
def write_dataset_variable_tags(client, dataset, writer, verbose=False):
    # send request to get total count
    ws = mica.core.UriBuilder(['collected-dataset', dataset, 'variables']).query('from', 0).query('limit', 0).build()
    response = send_request(client, ws, verbose)
    total = response['total'] if 'total' in response else 0
    encoder = codecs.getincrementalencoder('utf-8')()

    f = 0
    while total > 0 and f < total:
        ws = mica.core.UriBuilder(['collected-dataset', dataset, 'variables']).query('from', f).query('limit', 1000).build()
        response = send_request(client, ws, verbose)
        f = f + 1000
        # format response
        if 'variables' in response:
            for var in response['variables']:
                label = ''
                if 'attributes' in var:
                    for attr in var['attributes']:
                        if attr['name'] == 'label':
                            label = attr['values'][0]['value']
                    for attr in var['attributes']:
                        if 'namespace' in attr:
                            tag = attr['namespace'] + '::' + attr['name'] + '.' + attr['values'][0]['value']
                            writer.writerow({'study': var['studyIds'][0],
                                'dataset': encoder.encode(var['datasetId']),
                                'name': encoder.encode(var['name']),
                                'index': str(var['index']),
                                'label': encoder.encode(label),
                                'tag': tag
                                })
Ejemplo n.º 10
0
Archivo: tags.py Proyecto: Rima-B/mica2
def do_command(args):
    """
    Execute tags command
    """
    file = sys.stdout
    if args.out:
        file = open(args.out, 'wb')
    writer = csv.DictWriter(file, fieldnames=['study','dataset','name','index','label', 'tag'],
        escapechar='"', quotechar='"', quoting=csv.QUOTE_ALL)
    writer.writeheader()
    client = mica.core.MicaClient.build(mica.core.MicaClient.LoginInfo.parse(args))

    if args.dataset == None:
        ws = mica.core.UriBuilder(['collected-datasets']).query('from', 0).query('limit', 0).build()
        response = send_request(client, ws, args.verbose)
        total = response['total'] if 'total' in response else 0
        encoder = codecs.getincrementalencoder('utf-8')()

        f = 0
        while total > 0 and f < total:
            ws = mica.core.UriBuilder(['collected-datasets']).query('from', f).query('limit', 100).build()
            response = send_request(client, ws, args.verbose)
            f = f + 100
            if 'datasets' in response:
                for ds in response['datasets']:
                    write_dataset_variable_tags(client, encoder.encode(ds['id']), writer, args.verbose)
    else:
        write_dataset_variable_tags(client, args.dataset, writer, args.verbose)
Ejemplo n.º 11
0
 def __init__( self, fd, dialect=csv.excel, encoding="utf-8", **kwds ):
     # Redirect output to a queue
     self.queue   = StringIO()
     self.writer  = csv.writer( self.queue, dialect=dialect, delimiter=';',
                                quotechar='"', quoting=csv.QUOTE_NONNUMERIC, **kwds )
     self.stream  = fd
     self.encoder = codecs.getincrementalencoder( encoding )()
Ejemplo n.º 12
0
 def __init__(self, f, dialect=csv.excel, encoding="latin-1", **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoding = encoding
     self.encoder = codecs.getincrementalencoder(encoding)('replace')
Ejemplo n.º 13
0
 def __init__(self, f, dialect='excel', encoding="utf-8", **kwds):
     import csv, cStringIO, codecs
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
	def check(encoding, input="<?xml version='1.0' encoding='x'?>gürk\u20ac"):
		# Check stateless encoder with encoding autodetection
		e = codecs.getencoder("xml")
		inputdecl = input.replace("'x'", repr(encoding))
		assert e(inputdecl)[0].decode(encoding) == inputdecl

		# Check stateless encoder with specified encoding
		assert e(input, encoding=encoding)[0].decode(encoding) == inputdecl

		# Check incremental encoder with encoding autodetection
		ie = codecs.getincrementalencoder("xml")()
		assert b"".join(ie.iterencode(inputdecl)).decode(encoding) == inputdecl

		# Check incremental encoder with specified encoding
		ie = codecs.getincrementalencoder("xml")(encoding=encoding)
		assert b"".join(ie.iterencode(input)).decode(encoding) == inputdecl
Ejemplo n.º 15
0
 def __init__(self, f, cols, dialect=csv.excel, encoding='utf-8', **kwds):
   # Redirect output to a queue
   self.cols = cols
   self.queue = cStringIO.StringIO()
   self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
   self.stream = f
   self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 16
0
    def test_incremental_encode(self):
        self.assertEquals(
            "".join(codecs.iterencode(u"python.org", "idna")),
            "python.org"
        )
        self.assertEquals(
            "".join(codecs.iterencode(u"python.org.", "idna")),
            "python.org."
        )
        self.assertEquals(
            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
            "xn--pythn-mua.org."
        )
        self.assertEquals(
            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
            "xn--pythn-mua.org."
        )

        encoder = codecs.getincrementalencoder("idna")()
        self.assertEquals(encoder.encode(u"\xe4x"), "")
        self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.")
        self.assertEquals(encoder.encode(u"", True), "org")

        encoder.reset()
        self.assertEquals(encoder.encode(u"\xe4x"), "")
        self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
        self.assertEquals(encoder.encode(u"", True), "")
Ejemplo n.º 17
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
     self.quoting = kwds.get("quoting", None)
Ejemplo n.º 18
0
 def test_incrementalencoder_final(self):
     encoder = codecs.getincrementalencoder(self.encoding)()
     last_index = len(self.text) - 1
     output = b''.join(
         encoder.encode(char, index == last_index)
         for index, char in enumerate(self.text))
     self.assertEqual(output, self.expected_reset)
Ejemplo n.º 19
0
    def __init__(self, csv_file, columns):
        self.current_line = cStringIO.StringIO()
        self.writer = csv.DictWriter(self.current_line, columns, restval = 'NA', dialect = 'excel')
        self.stream = csv_file
        self.encoder = codecs.getincrementalencoder(UTF8)()

        self.writer.writeheader()
Ejemplo n.º 20
0
def get_csv_row_writer(stream, dialect=csv.excel, encoding="utf-8", **kwargs):
    """
    Create a csv, encoding from unicode, row writer.

    Use returned callable to write rows of unicode data
    to a stream, such as a file opened in write mode,
    in utf-8(or another) encoding.

    ::

        my_row_data = [
            [u'one', u'two'],
            [u'three', u'four'],
        ]

        with open('myfile.csv', 'wt') as myfile:
            unicode_row_writer = get_unicode_row_writer(myfile)
            for row in my_row_data:
                unicode_row_writer(row)
    """
    if is_py3():
        writer = csv.writer(stream, dialect=dialect, **kwargs)
        return writer.writerow

    else:
        queue = StringIO()
        writer = csv.writer(queue, dialect=dialect, **kwargs)
        encoder = codecs.getincrementalencoder(encoding)()
        return partial(_encode_write_row, stream, queue, writer, encoder)
Ejemplo n.º 21
0
 def __init__(self, f, dialect=csv.excel, **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder("UTF-8")()
     self.stream.write(codecs.BOM_UTF8)
Ejemplo n.º 22
0
 def __init__(self, f, fields, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.DictWriter(self.queue, fieldnames=fields,
                                  dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 23
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     encoder_cls = codecs.getincrementalencoder(encoding)
     self.encoder = encoder_cls(errors=errors)
Ejemplo n.º 24
0
 def encode(self, text_utf8, text_latex, inputenc=None, errors='strict'):
     encoding = 'latex+' + inputenc if inputenc else 'latex'
     encoder = codecs.getincrementalencoder(encoding)(errors=errors)
     encoded_parts = (
         encoder.encode(text_utf8_part, final)
         for text_utf8_part, final in split_input(text_utf8))
     self.assertEqual(text_latex, b''.join(encoded_parts))
Ejemplo n.º 25
0
	def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
		# Redirect output to a queue
		self.queue = cStringIO.StringIO()
		self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
		self.stream = f
		self.encoder = codecs.getincrementalencoder(encoding)()
		self.ERROR_STRING = 'ERROR'.encode("utf-8")
Ejemplo n.º 26
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     # created a writer with Excel formating settings
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 27
0
 def __init__(self, f, encoding='utf-8', **kwds):
     # Redirect output to a queue
     self.encoding = encoding
     self.queue = StringIO.StringIO()
     self.writer = csv.writer(self.queue, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 28
0
 def __init__(self, f, fieldnames, restval="", extrasaction="raise",
              dialect="excel", encoding="utf-8", *args, **kwds):
     csv.DictWriter.__init__(self, f, fieldnames, restval, extrasaction, dialect, *args, **kwds)
     # Redirect output to a queue
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 29
0
 def __init__(self, f, fieldnames, dialect=csv.excel, restval="", encoding="utf-8", ignore_errors=False, **kwds):
     # Redirect output to a queue
     self.restval = restval
     self.fieldnames = fieldnames
     self.writer = _UnicodeWriter(f, dialect=dialect, fieldnames=fieldnames, encoding=encoding, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
     self.ignore_errors = ignore_errors
Ejemplo n.º 30
0
 def __init__(self, fp, dialect=csv.excel, encoding='utf-8', **kwargs):
     if sys.version_info[0] < 3:
         self.queue = io.BytesIO()
     else:
         self.queue = io.StringIO(newline='')
     self.writer = csv.writer(self.queue, dialect=dialect, **kwargs)
     self.stream = fp
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 31
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
     self.queue = cStringIO.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 32
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = getincrementalencoder(encoding)()
Ejemplo n.º 33
0
    def __init__(self,
                 timeout=30,
                 maxread=2000,
                 searchwindowsize=None,
                 logfile=None,
                 encoding=None,
                 codec_errors='strict'):
        self.stdin = sys.stdin
        self.stdout = sys.stdout
        self.stderr = sys.stderr

        self.searcher = None
        self.ignorecase = False
        self.before = None
        self.after = None
        self.match = None
        self.match_index = None
        self.terminated = True
        self.exitstatus = None
        self.signalstatus = None
        # status returned by os.waitpid
        self.status = None
        # the child file descriptor is initially closed
        self.child_fd = -1
        self.timeout = timeout
        self.delimiter = EOF
        self.logfile = logfile
        # input from child (read_nonblocking)
        self.logfile_read = None
        # output to send (send, sendline)
        self.logfile_send = None
        # max bytes to read at one time into buffer
        self.maxread = maxread
        # This is the read buffer. See maxread.
        self.buffer = bytes() if (encoding is None) else text_type()
        # Data before searchwindowsize point is preserved, but not searched.
        self.searchwindowsize = searchwindowsize
        # Delay used before sending data to child. Time in seconds.
        # Set this to None to skip the time.sleep() call completely.
        self.delaybeforesend = 0.05
        # Used by close() to give kernel time to update process status.
        # Time in seconds.
        self.delayafterclose = 0.1
        # Used by terminate() to give kernel time to update process status.
        # Time in seconds.
        self.delayafterterminate = 0.1
        # Delay in seconds to sleep after each call to read_nonblocking().
        # Set this to None to skip the time.sleep() call completely: that
        # would restore the behavior from pexpect-2.0 (for performance
        # reasons or because you don't want to release Python's global
        # interpreter lock).
        self.delayafterread = 0.0001
        self.softspace = False
        self.name = '<' + repr(self) + '>'
        self.closed = True

        # Unicode interface
        self.encoding = encoding
        self.codec_errors = codec_errors
        if encoding is None:
            # bytes mode (accepts some unicode for backwards compatibility)
            self._encoder = self._decoder = _NullCoder()
            self.string_type = bytes
            self.crlf = b'\r\n'
            if PY3:
                self.allowed_string_types = (bytes, str)
                self.linesep = os.linesep.encode('ascii')

                def write_to_stdout(b):
                    try:
                        return sys.stdout.buffer.write(b)
                    except AttributeError:
                        # If stdout has been replaced, it may not have .buffer
                        return sys.stdout.write(b.decode('ascii', 'replace'))

                self.write_to_stdout = write_to_stdout
            else:
                self.allowed_string_types = (basestring, )  # analysis:ignore
                self.linesep = os.linesep
                self.write_to_stdout = sys.stdout.write
        else:
            # unicode mode
            self._encoder = codecs.getincrementalencoder(encoding)(
                codec_errors)
            self._decoder = codecs.getincrementaldecoder(encoding)(
                codec_errors)
            self.string_type = text_type
            self.crlf = u'\r\n'
            self.allowed_string_types = (text_type, )
            if PY3:
                self.linesep = os.linesep
            else:
                self.linesep = os.linesep.decode('ascii')
            # This can handle unicode in both Python 2 and 3
            self.write_to_stdout = sys.stdout.write
        # storage for async transport
        self.async_pw_transport = None
#end_pymotw_header
import codecs
import sys

from codecs_to_hex import to_hex

text = b'abcdefghijklmnopqrstuvwxyz\n'
repetitions = 50

print('Text length :', len(text))
print('Repetitions :', repetitions)
print('Expected len:', len(text) * repetitions)

# Encode the text several times to build up a
# large amount of data
encoder = codecs.getincrementalencoder('bz2')()
encoded = []

print()
print('Encoding:', end=' ')
last = repetitions - 1
for i in range(repetitions):
    en_c = encoder.encode(text, final=(i == last))
    if en_c:
        print('\nEncoded : {} bytes'.format(len(en_c)))
        encoded.append(en_c)
    else:
        sys.stdout.write('.')

all_encoded = b''.join(encoded)
print()
Ejemplo n.º 35
0
 def __init__(self, f, dialect=csv.excel, encoding=WRITER_ENCODING, **kwds):
     # Redirect output to a queue
     self.queue = io.StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 36
0
 def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds):
     self.stream = f
     self.writer = csv.writer(self.stream, dialect=dialect, **kwds)
     self.encoder = codecs.getincrementalencoder(encoding)()
Ejemplo n.º 37
0
 def set_tx_encoding(self, encoding, errors='replace'):
     """set encoding for transmitted data"""
     self.output_encoding = encoding
     self.tx_encoder = codecs.getincrementalencoder(encoding)(errors)