def _fuzz(word, fuzziness=0.2): """Fuzz a word with noise. Parameters ---------- word : str A word to fuzz fuzziness : float How fuzzy to make the word Returns ------- str A fuzzed word """ while True: new_word = [] for ch in word: if random() > fuzziness: # noqa: S311 new_word.append(ch) else: if random() > 0.5: # noqa: S311 new_word.append(choice(printable)) # noqa: S311 elif random() > 0.8: # noqa: S311 new_word.append(unichr(randint(0, 0x10FFFF))) # noqa: S311 else: new_word.append(unichr(randint(0, 0xFFFF))) # noqa: S311 if random() > 0.5: # noqa: S311 new_word.append(ch) new_word = ''.join(new_word) if new_word != word: return new_word
def test___init__(self): expected = b'\xc3\xa9\xe0\xaf\xb2\xe0\xbe\x84' if six.PY3: expected = expected.decode('utf-8') message = six.unichr(233) + six.unichr(0x0bf2) + six.unichr(3972) exc = exception.IronicException(message) self.assertEqual(expected, exc.__str__())
def _build_illegal_xml_regex(): """Constructs a regex to match all illegal xml characters. Expects to be used against a unicode string.""" # Construct the range pairs of invalid unicode characters. illegal_chars_u = [ (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84), (0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)] # For wide builds, we have more. if sys.maxunicode >= 0x10000: illegal_chars_u.extend( [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)]) # Build up an array of range expressions. illegal_ranges = [ "%s-%s" % (six.unichr(low), six.unichr(high)) for (low, high) in illegal_chars_u] # Compile the regex return re.compile(six.u('[%s]') % six.u('').join(illegal_ranges))
def test_dash_issue(self): html = '<strong>—</strong>' self.server.response['get.data'] = html grab = build_grab() grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertFalse(grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertTrue(grab.doc.select('//strong/text()').text() == six.unichr(8212)) # disable fix-behaviour grab.setup(fix_special_entities=False) grab.go(self.server.get_url()) # By default &#[128-160]; are fixed self.assertTrue(grab.doc.select('//strong/text()').text() == six.unichr(151)) self.assertFalse(grab.doc.select('//strong/text()').text() == six.unichr(8212)) # Explicitly use unicode_body func grab = build_grab() grab.go(self.server.get_url()) #print(':::', grab.doc.unicode_body()) self.assertTrue('—' in grab.doc.unicode_body())
def decompress(compressed): """Decompress a list of output ks to a string.""" # Build the dictionary. dict_size = 0x10000 dictionary = dict((six.unichr(i), six.unichr(i)) for i in range(dict_size)) result = io.StringIO() w = compressed.pop(0) result.write(w) for k in compressed: if k in dictionary: entry = dictionary[k] elif k == dict_size: entry = w + w[0] else: raise ValueError('Bad compressed k: %s' % k) result.write(entry) # Add w+entry[0] to the dictionary. dictionary[dict_size] = w + entry[0] dict_size += 1 w = entry return result.getvalue()
def test_render_response_utf8(self): req = apirequest.APIRequest("FakeAction", "FakeVersion", {}) resp = { 'utf8': six.unichr(40960) + u'abcd' + six.unichr(1972) } data = req._render_response(resp, 'uuid').decode() self.assertIn('<utf8>ꀀabcd޴</utf8>', data)
def _init_cache(): '''Creates a mapping of wide->narrow and narrow->wide characters''' global _wide_to_narrow global _narrow_to_wide _wide_to_narrow = {} _narrow_to_wide = {} char_names = {six.unichr(i): unicodedata.name(six.unichr(i), None) for i in range(0, 65536) } for wide_ch, name in char_names.items(): if name is None: continue if name.upper().startswith('FULLWIDTH '): half_name = name[len('FULLWIDTH '):] else: half_name = 'HALFWIDTH {}'.format(name) try: half_ch = unicodedata.lookup(half_name) except KeyError: pass else: _wide_to_narrow[wide_ch] = half_ch _narrow_to_wide[half_ch] = wide_ch logger.debug('Mapped %d characters from wide<->narrow', len(_wide_to_narrow))
def setUp(self): # These tests verify the UTF-8 decoder/validator on the various test cases from # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt vs = [] for k in _create_utf8_test_sequences(): vs.extend(k[1]) # All Unicode code points for i in range( 0, 0xffff): # should by 0x10ffff, but non-wide Python build is limited to 16-bits if i < 0xD800 or i > 0xDFFF: # filter surrogate code points, which are disallowed to encode in UTF-8 vs.append((True, six.unichr(i).encode("utf-8"))) # FIXME: UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' # in position 0: surrogates not allowed if False: # 5.1 Single UTF-16 surrogates for i in range(0xD800, 0xDBFF): # high-surrogate ss = six.unichr(i).encode("utf-8") vs.append((False, ss)) for i in range(0xDC00, 0xDFFF): # low-surrogate ss = six.unichr(i).encode("utf-8") vs.append((False, ss)) # 5.2 Paired UTF-16 surrogates for i in range(0xD800, 0xDBFF): # high-surrogate for j in range(0xDC00, 0xDFFF): # low-surrogate ss1 = six.unichr(i).encode("utf-8") ss2 = six.unichr(j).encode("utf-8") vs.append((False, ss1 + ss2)) vs.append((False, ss2 + ss1)) self._TEST_SEQUENCES = vs
def setUp(self): path = tests.get_data_path(('PP', 'simple_pp', 'global.pp')) self.cube_2d = iris.load_cube(path) # Generate the unicode cube up here now it's used in two tests. unicode_str = six.unichr(40960) + u'abcd' + six.unichr(1972) self.unicode_cube = iris.tests.stock.simple_1d() self.unicode_cube.attributes['source'] = unicode_str
def gen_utf8 ( file_bytes ): global args file_dict = {} rows_dict = {} # convert to utf-8 errors = 'strict' if args.errors: errors = 'replace' try: file_utf8 = file_bytes.decode('utf-8', errors) pass except UnicodeDecodeError: print ( "file {0} is not valid utf-8, try analysing file as bytes using flag -b or enable error replacement with flag -e\n".format(source)) sys.exit ( 2 ) except: traceback.print_exc() sys.exit ( 1 ) for c in file_utf8: if not c in file_dict: file_dict[ c ] = 0 file_dict[ c ] += 1 if c > six.unichr(0x7f): cp = ord( c ) cp = ( int( cp / 16 ) ) * 16 rows_dict[ six.unichr( cp ) ] = True return ( file_dict, rows_dict )
def _mouse_handler(self, cli, mouse_event): """ Handle mouse events in a pane. A click in a non-active pane will select it, one in an active pane, will send the mouse event to the application running inside it. """ process = self.process x = mouse_event.position.x y = mouse_event.position.y # The containing Window translates coordinates to the absolute position # of the whole screen, but in this case, we need the relative # coordinates of the visible area. y -= self.process.screen.line_offset if not self.has_focus(cli): # Focus this process when the mouse has been clicked. if mouse_event.event_type == MouseEventTypes.MOUSE_UP: self.set_focus(cli) else: # Already focussed, send event to application when it requested # mouse support. if process.screen.sgr_mouse_support_enabled: # Xterm SGR mode. ev, m = { MouseEventTypes.MOUSE_DOWN: ('0', 'M'), MouseEventTypes.MOUSE_UP: ('0', 'm'), MouseEventTypes.SCROLL_UP: ('64', 'M'), MouseEventTypes.SCROLL_DOWN: ('65', 'M'), }.get(mouse_event.event_type) self.process.write_input( '\x1b[<%s;%s;%s%s' % (ev, x + 1, y + 1, m)) elif process.screen.urxvt_mouse_support_enabled: # Urxvt mode. ev = { MouseEventTypes.MOUSE_DOWN: 32, MouseEventTypes.MOUSE_UP: 35, MouseEventTypes.SCROLL_UP: 96, MouseEventTypes.SCROLL_DOWN: 97, }.get(mouse_event.event_type) self.process.write_input( '\x1b[%s;%s;%sM' % (ev, x + 1, y + 1)) elif process.screen.mouse_support_enabled: # Fall back to old mode. if x < 96 and y < 96: ev = { MouseEventTypes.MOUSE_DOWN: 32, MouseEventTypes.MOUSE_UP: 35, MouseEventTypes.SCROLL_UP: 96, MouseEventTypes.SCROLL_DOWN: 97, }.get(mouse_event.event_type) self.process.write_input('\x1b[M%s%s%s' % ( six.unichr(ev), six.unichr(x + 33), six.unichr(y + 33)))
def set_unichr(x): if isinstance(x, string_types): return unichr(int(x, 16)) elif isinstance(x, integer_types): return unichr(x) else: return unichr(int(x))
def test_reading_utf8_without_flag(self): with open('test/utf8_without_leader_flag.dat', 'rb') as fh: reader = MARCReader(fh, to_unicode=False) record = next(reader) self.assertEqual(type(record), Record) utitle = record['240']['a'] self.assertEqual(type(utitle), binary_type) self.assertEqual(utitle, b'De la solitude a\xcc\x80 la communaute\xcc\x81.') with open('test/utf8_without_leader_flag.dat', 'rb') as fh: reader = MARCReader(fh, to_unicode=True, hide_utf8_warnings=True) record = next(reader) self.assertEqual(type(record), Record) utitle = record['240']['a'] self.assertEqual(type(utitle), text_type) # unless you force utf-8 characters will get lost and # warnings will appear in the terminal self.assertEqual(utitle, 'De la solitude a la communaute .') # force reading as utf-8 with open('test/utf8_without_leader_flag.dat', 'rb') as fh: reader = MARCReader(fh, to_unicode=True, force_utf8=True, hide_utf8_warnings=True) record = next(reader) self.assertEqual(type(record), Record) utitle = record['240']['a'] self.assertEqual(type(utitle), text_type) self.assertEqual(utitle, u'De la solitude a' + unichr(0x0300) + ' la communaute' + unichr(0x0301) + '.')
def _next_code_point(val, val_iter, yield_char=False, to_int=lambda x: x): """Provides the next *code point* in the given Unicode sequence. This generator function yields complete character code points, never incomplete surrogates. When a low surrogate is found without following a high surrogate, this function raises ``ValueError`` for having encountered an unpaired low surrogate. When the provided iterator ends on a high surrogate, this function yields ``None``. This is the **only** case in which this function yields ``None``. When this occurs, the user may append additional data to the input unicode sequence and resume iterating through another ``next`` on this generator. When this function receives ``next`` after yielding ``None``, it *reinitializes the unicode iterator*. This means that this feature can only be used for values that contain an ``__iter__`` implementation that remains at the current position in the data when called (e.g. :class:`BufferQueue`). At this point, there are only two possible outcomes: * If next code point is a valid low surrogate, this function yields the combined code point represented by the surrogate pair. * Otherwise, this function raises ``ValueError`` for having encountered an unpaired high surrogate. Args: val (unicode|BufferQueue): A unicode sequence or unicode BufferQueue over which to iterate. val_iter (Iterator[unicode|BufferQueue]): The unicode sequence iterator over ``val`` from which to generate the next integer code point in the range ``0x0`` to ``0x10FFFF``. yield_char (Optional[bool]): If True **and** the character code point resulted from a surrogate pair, this function will yield a :class:`CodePoint` representing the character code point and containing the original unicode character. This is useful when the original unicode character will be needed again because UCS2 Python builds will error when trying to convert code points greater than 0xFFFF back into their unicode character representations. This avoids requiring the user to mathematically re-derive the surrogate pair in order to successfully convert the code point back to a unicode character. to_int (Optional[callable]): A function to call on each element of val_iter to convert that element to an int. """ high = next(val_iter) low = None code_point = to_int(high) if _LOW_SURROGATE_START <= code_point <= _LOW_SURROGATE_END: raise ValueError('Unpaired low surrogate in Unicode sequence: %d' % code_point) elif _HIGH_SURROGATE_START <= code_point <= _HIGH_SURROGATE_END: def combine_surrogates(): low_surrogate = next(val_iter) low_code_point = to_int(low_surrogate) if low_code_point < _LOW_SURROGATE_START or low_code_point > _LOW_SURROGATE_END: raise ValueError('Unpaired high surrogate: %d' % code_point) # Decode the surrogates real_code_point = _NON_BMP_OFFSET real_code_point += (code_point - _HIGH_SURROGATE_START) << 10 real_code_point += (low_code_point - _LOW_SURROGATE_START) return real_code_point, low_surrogate try: code_point, low = combine_surrogates() except StopIteration: yield None val_iter = iter(val) # More data has appeared in val. code_point, low = combine_surrogates() if yield_char and low is not None: out = CodePoint(code_point) if isinstance(val, six.text_type): # Iterating over a text type returns text types. out.char = high + low else: out.char = six.unichr(high) + six.unichr(low) else: out = code_point yield out
def format_bar(cnt): scaled = cnt*width/max_count full = int(floor(scaled)) eighths = int(ceil((scaled-full)*8)) if eighths: return full*six.unichr(0x2588) + six.unichr(0x2588+(8-eighths)) else: return full*six.unichr(0x2588)
def load(self, fileName, hdrf): f = open(fileName, 'rb') strf = ''.join(struct.unpack('10s', f.read(10))) if strf != '#?RADIANCE': f.close() return False f.seek(1, 1) cmd = [] c = 0 oldc = 0 while True: oldc = c c = ord(f.read(1)) if c is 0xa and oldc is 0xa: break cmd.append(str(sm.unichr(c))) reso = [] while True: c = ord(f.read(1)) cstr = str(sm.unichr(c)) reso.append(cstr) if c is 0xa: break resoStr = "".join(reso) resoStrUnformat = re.match('\-Y (?P<_0>\d+) \+X (?P<_1>\d+)', resoStr) (ws, hs) = map(itemgetter(1), sorted(resoStrUnformat.groupdict().items())) w = int(ws) h = int(hs) hdrf.width = w hdrf.height = h self.scanline = [[-1 for i in range(4)] for i in range(w)] if not self.scanline: print("File closed because scanline not found.") f.close() return False scnidx = 0 # Convert image for [] in range(h - 1, -1, -1): # If self.scanline doesn't update is because of this if (self.deCrunch(scnidx, w, f) is False): break # This should update the cols array in hdrf which is the HDR Class. # If hdrf.cols doesn't update is because of this self.workOnRGBE(hdrf, w) scnidx = scnidx + 1 f.close()
def test_decode_entities(self): html = u'→' self.assertEquals(six.unichr(8594), decode_entities(html)) html = u'→' self.assertEquals(six.unichr(8594), decode_entities(html)) html = u'→' self.assertEquals(six.unichr(8594), decode_entities(html))
def toggle_advanced_panel(self): """Hides/Shows the advanced options panel""" if bool(self.show_advanced.get()): self.subFrame.grid(column=0,row=7,columnspan=4,rowspan=6, sticky=Tk.E+Tk.W, ipady=5) self.subFrame.grid_columnconfigure(2, weight=1) self.toggle_button.configure(text='Advanced ' + six.unichr(9660)) else: self.subFrame.grid_forget() self.toggle_button.configure(text='Advanced '+ six.unichr(9654))
def handle_charref(self, name): if self.savedata is None: return if name[:1].lower() in ('x', 'u'): c = unichr(int(name[1:], 16)) else: c = unichr(int(name)) log.debug("Num ent: &%s; = %r", name, c) self.handle_data(c)
def testUnicodeInRoute(self): url = "/update/3/GMP/53.0/20170421105455/Linux_x86_64-gcc3/null/{}/Linux%204.4.0-53-generic%20(GTK%203.18.9,libpulse%208.0.0)/mint/1.0/update.xml" # Test an arbitrary number of unicode chars cdec = step = 42 while cdec < 4200: channel = ''.join(unichr(c) for c in range(cdec, cdec + step) if unichr(c).isalpha()) cdec += step url = url.format(channel) ret = self.client.get(url) self.assertEqual(ret.status_code, 200)
def _write(self, output, value): coord = int(round(value * 1e5, 0)) coord <<= 1 coord = coord if coord >= 0 else ~coord while coord >= 0x20: output.write(six.unichr((0x20 | (coord & 0x1f)) + 63)) coord >>= 5 output.write(six.unichr(coord + 63))
def unichr2(i): """Returns a Unicode string of one character with ordinal 32 <= i, otherwise an escaped control character. """ if 32 <= i: return six.unichr(i) elif six.unichr(i) in controlchars: # we just return the character, unescaped # if people want to escape them they can use escapecontrols return six.unichr(i) return "\\u%04x" % i
def repl(m): if m.group(2) is not None: high = int(m.group(1), 16) low = int(m.group(2), 16) if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF: cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000 return unichr(cp) else: return unichr(high) + unichr(low) else: return unichr(int(m.group(1), 16))
def show_status(self, changed): status = '' # set 'geotagged' status if self.metadata.latlong: status += six.unichr(0x2690) # set 'unsaved' status if changed: status += six.unichr(0x26A1) self.status.setText(status) self._elide_name() if changed: self.image_list.new_metadata.emit(True)
def __init__(self, start=0, end=10, width=12, fill=six.unichr(0x25C9).encode("utf-8"), blank=six.unichr(0x25CC).encode("utf-8"), marker=six.unichr(0x25CE).encode("utf-8"), format='[%(fill)s%(marker)s%(blank)s] %(progress)s%%', incremental=True, stdout=sys.stdout): super(AnimatedProgressBar, self).__init__(start,end,width,fill,blank,marker,format,incremental) self.stdout = stdout
def test_unaccepted_control_chars(self): """Tests we cannot assign the unaccepted control chars without escaping. Source: https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0 """ exc_msg = ("All strings must be XML compatible: Unicode or ASCII, no " "NULL bytes or control characters") for code in xliff.ASCII_CONTROL_CODES: self.unit.target = u'Een&#x%s;' % code.lstrip('0') or '0' assert self.unit.target == u'Een%s' % six.unichr(int(code, 16)) self.unit.target = u'Een%s' % six.unichr(int(code, 16)) assert self.unit.target == u'Een%s' % six.unichr(int(code, 16))
def _write(self, output, curr_value, prev_value, factor): curr_value = self._py2_round(curr_value * factor) prev_value = self._py2_round(prev_value * factor) coord = curr_value - prev_value coord <<= 1 coord = coord if coord >= 0 else ~coord while coord >= 0x20: output.write(six.unichr((0x20 | (coord & 0x1f)) + 63)) coord >>= 5 output.write(six.unichr(coord + 63))
def handle_charref(self, ref): # called for each character reference, e.g. for ' ', ref will be '160' # Reconstruct the original character reference. if ref.startswith('x'): value = unichr(int(ref[1:], 16)) else: value = unichr(int(ref)) if value in _cp1252.keys(): self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) else: self.pieces.append('&#%(ref)s;' % locals())
def _send_cmd(self, command, server): """Internal method to send a single command to the pexpect object. Args:: command: the command to send server: the pexpect object to send to Returns: The formatted output of the command as a string, or -1 on timeout """ try: if self.options["unix_line_endings"]: server.send("{0}{1}".format( command, six.unichr(0x000A), )) elif self.options["windows_line_endings"]: server.send("{0}{1}{2}".format( command, six.unichr(0x000D), six.unichr(0x000A), )) else: server.sendline(command) cmd_response = server.expect( self.options["shell_prompts"] + self.options["extra_prompts"] + self.options["passwd_prompts"], self.options["cmd_timeout"], ) if cmd_response >= ( len(self.options["shell_prompts"]) + len(self.options["extra_prompts"]) ) and len(self.options["second_password"] or "") > 0: server.sendline(self.options["second_password"]) server.expect( self.options["shell_prompts"] + self.options["extra_prompts"], self.options["cmd_timeout"], ) except (pexpect.TIMEOUT, pexpect.EOF): return self._try_for_unmatched_prompt( server, server.before, command, ) return format_output(server.before, command, self.options)
def test_unicode_scalar(self): """ Storage of variable-length unicode strings (auto-creation) """ self.f.attrs['x'] = u"Hello" + six.unichr(0x2340) + u"!!" out = self.f.attrs['x'] self.assertEqual(out, u"Hello" + six.unichr(0x2340) + u"!!") self.assertEqual(type(out), six.text_type) aid = h5py.h5a.open(self.f.id, b"x") tid = aid.get_type() self.assertEqual(type(tid), h5py.h5t.TypeStringID) self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8) self.assertTrue(tid.is_variable_str())
def handle_entityref(self, name): c = unichr(name2codepoint[name]) print("Named ent:", c)
return default elif l == 1: return ord(s) elif l == 2: return struct.unpack('>H', s)[0] elif l == 3: return struct.unpack('>L', b'\x00' + s)[0] elif l == 4: return struct.unpack('>L', s)[0] else: raise TypeError('invalid length: %d' % l) # decode_text PDFDocEncoding = ''.join( six.unichr(x) for x in ( 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e,
def handle_charref(self, name): if name.startswith('x'): c = unichr(int(name[1:], 16)) else: c = unichr(int(name)) print("Num ent :", c)
def replchar(m): num = m.group(1) return six.unichr(int(num))
def repl_oct_char(match): return unichr(int(match.group(0)[1:], 8))
def handle_charref(self, name): if name.startswith('x'): codepoint = int(name[1:], 16) else: codepoint = int(name) self.handle_ref(six.unichr(codepoint))
def _unicodeExpand(s): return r_unicodeEscape.sub(lambda m: six.unichr(int(m.group(0)[2:], 16)), s)
def render_char(self, code_point, is_first): char = six.unichr(code_point) if self.needs_escape(char, is_first): return re.escape(char) return char
from __future__ import print_function import collections import sys import unicodedata import six from six.moves import range # pylint: disable=redefined-builtin import tensorflow as tf # Conversion between Unicode and UTF-8, if required (on Python2) _native_to_unicode = (lambda s: s.decode("utf-8")) if six.PY2 else (lambda s: s) # This set contains all letter and number characters. _ALPHANUMERIC_CHAR_SET = set( six.unichr(i) for i in range(sys.maxunicode) if (unicodedata.category(six.unichr(i)).startswith("L") or unicodedata.category(six.unichr(i)).startswith("N"))) def encode(text): """Encode a unicode string as a list of tokens. Args: text: a unicode string Returns: a list of tokens as Unicode strings """ if not text: return [] ret = []
def property_chars(self, prefix): return "".join( six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix))
def generate_ids(prefix=None, postfix=None, count=1, length=None, use_unicode=False, _stepler_prefix=None): """Generate unique identificators, based on UUID. Arguments: prefix (str|None): prefix of unique ids postfix (str|None): postfix of unique ids count (int): count of unique ids length (int|None): length of unique ids use_unicode (boolean|False): generate str with unicode or not _stepler_prefix (str, optional): Resources prefix is used to call ``generate_ids`` inside ``stepler.config`` and avoid cross imports problem. By default it has value ``stepler.config.STEPLER_PREFIX``. Returns: generator: unique ids """ # TODO(schipiga): thirdparty module should know nothing about stepler # configured values. We hack it for usability. # ``If``-statement is used to allow ``generate_ids`` inside # ``stepler.config`` and prevent cross imports problem. if not _stepler_prefix: from stepler import config _stepler_prefix = config.STEPLER_PREFIX if prefix: prefix = prefix.strip('-') if postfix: postfix = postfix.strip('-') # hash usually should have >= 7 symbols uid_length = min_uid_length = 7 # calculate if it's possible to generate UID with requested length, # postfix and prefix if length: const_length = len(_stepler_prefix + '-' + (prefix + '-' if prefix else '') + # uid will be here ('-' + postfix if postfix else '')) uid_length = length - const_length if uid_length < min_uid_length: raise ValueError( "According to passed prefix and postfix minimal length to " "generate unique id must be equal or greater " "than {0}.".format(const_length + min_uid_length)) for _ in range(count): # mix constant stepler prefix to separate tested objects uid = _stepler_prefix if prefix: uid += '-' + prefix if use_unicode: uid += '-' + u"".join( six.unichr(random.choice(range(0x0400, 0x045F))) for _ in range(uid_length)) else: uid_val = str(uuid.uuid4()) uid_val = (uid_val * (uid_length // len(uid_val) + 1))[:uid_length] uid += '-' + uid_val if postfix: uid += '-' + postfix yield uid
"""various classes for establishing ground truth""" from .classification import classes_to_numpy, classes_from_numpy, BLANK_CLASS from .opencv_utils import show_image_and_wait_for_key, draw_segments, draw_classes import numpy import string from six import text_type, unichr, moves NOT_A_SEGMENT = unichr(10) class Grounder(object): def ground(self, imagefile, segments, external_data): """given an ImageFile, grounds it, through arbitrary data (better defined in subclasses)""" raise NotImplementedError() class TerminalGrounder(Grounder): """ Labels by using raw_input() to capture a character each line """ def ground(self, imagefile, segments, _=None): classes = [] character = "" print("Found %s segments to ground." % len(segments)) print("Type 'exit' to stop grounding the file.") print("Type ' ' for anything that is not a character.") print("Grounding will exit automatically after all segments.") print("Going back to a previous segment is not possible at this time.") for num in range(len(segments)): while len(character) != 1:
def testUnicodeStr(self): html = self.doc_class('<dtml-var a><dtml-var b>') expected = u'\u07d0\xc8' res = html(a=force_str(six.unichr(2000)), b=self._recode(b'\xc3\x88')) self.assertEqual(res, expected)
def hook(obj): result = {} for key in obj: result[key] = unichr(int(obj[key], 16)) return result
def _escape_char(c): if isinstance(c, int): c = six.unichr(c) return c if c in LEGAL_CHARS else ESCAPE_FMT.format(ord(c))
def handle_charref(self, name): if name.lower().startswith('x'): text = six.unichr(int(name[1:], 16)) else: text = six.unichr(int(name)) self._enqueue(TEXT, text)
def handle_charref(self, name): """Handle entries in the form &#NNNN; e.g. ⃡""" self.handle_data(six.unichr(int(name)))
if sys.version_info >= (2, 7) or sys.version_info >= (3, 2): import unittest as ut else: try: import unittest2 as ut except ImportError: raise ImportError( 'unittest2 is required to run the test suite with python-%d.%d' % (sys.version_info[:2])) # Check if non-ascii filenames are supported # Evidently this is the most reliable way to check # See also h5py issue #263 and ipython #466 # To test for this, run the testsuite with LC_ALL=C try: testfile, fname = tempfile.mkstemp(unichr(0x03b7)) except UnicodeError: UNICODE_FILENAMES = False else: UNICODE_FILENAMES = True os.close(testfile) os.unlink(fname) del fname del testfile class TestCase(ut.TestCase): """ Base class for unit tests. """ @classmethod
from feature_extraction import FEATURE_DATATYPE import numpy import cv2 from opencv_utils import get_opencv_version from six import unichr CLASS_DATATYPE = numpy.uint16 CLASS_SIZE = 1 CLASSES_DIRECTION = 0 # vertical - a classes COLUMN BLANK_CLASS = unichr(35) # marks unclassified elements def classes_to_numpy(classes): """given a list of unicode chars, transforms it into a numpy array""" import array # utf-32 starts with constant ''\xff\xfe\x00\x00', then has little endian 32 bits chars # this assumes little endian architecture! assert unichr(15).encode('utf-32') == b'\xff\xfe\x00\x00\x0f\x00\x00\x00' assert array.array("I").itemsize == 4 int_classes = array.array("I", "".join(classes).encode('utf-32')[4:]) assert len(int_classes) == len(classes) classes = numpy.array(int_classes, dtype=CLASS_DATATYPE, ndmin=2) # each class in a column. numpy is strange :( classes = classes if CLASSES_DIRECTION == 1 else numpy.transpose(classes) return classes def classes_from_numpy(classes): """reverses classes_to_numpy""" classes = classes if CLASSES_DIRECTION == 0 else classes.tranpose() classes = list(map(unichr, classes))
def property_chars(self, prefix): #pylint:disable=no-self-use return "".join(six.unichr(x) for x in range(sys.maxunicode) \ if unicodedata.category(six.unichr(x)).startswith(prefix))
def sign(n): return cmp(n, 0) r_unicodeEscape = re.compile(r'(\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})') def _unicodeExpand(s): return r_unicodeEscape.sub(lambda m: six.unichr(int(m.group(0)[2:], 16)), s) narrow_build = False try: six.unichr(0x10FFFF) except ValueError: narrow_build = True if narrow_build: def _unicodeExpand(s): try: return r_unicodeEscape.sub( lambda m: six.unichr(int(m.group(0)[2:], 16)), s) except ValueError: warnings.warn( 'Encountered a unicode char > 0xFFFF in a narrow python build. ' 'Trying to degrade gracefully, but this can cause problems ' 'later when working with the string:\n%s' % s) return r_unicodeEscape.sub(
class Junit(py.xml.Namespace): pass # We need to get the subset of the invalid unicode ranges according to # XML 1.0 which are valid in this python build. Hence we calculate # this dynamically instead of hardcoding it. The spec range of valid # chars is: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] # | [#x10000-#x10FFFF] _legal_chars = (0x09, 0x0A, 0x0D) _legal_ranges = ((0x20, 0x7E), (0x80, 0xD7FF), (0xE000, 0xFFFD), (0x10000, 0x10FFFF)) _legal_xml_re = [ u"%s-%s" % (six.unichr(low), six.unichr(high)) for (low, high) in _legal_ranges if low < sys.maxunicode ] _legal_xml_re = [six.unichr(x) for x in _legal_chars] + _legal_xml_re illegal_xml_re = re.compile(u"[^%s]" % u"".join(_legal_xml_re)) del _legal_chars del _legal_ranges del _legal_xml_re _py_ext_re = re.compile(r"\.py$") def bin_xml_escape(arg): def repl(matchobj): i = ord(matchobj.group()) if i <= 0xFF:
def handle_entityref(self, name): try: text = six.unichr(entities.name2codepoint[name]) except KeyError: text = '&%s;' % name self._enqueue(TEXT, text)
' ': '_', ',': '_', '.': '_', '-': '_', '/': '_PER_', '%': 'PERCENT', '[': '', ']': '', '(': '', ')': '', "'": '', '8': 'EIGHT', '15': 'FIFTEEN', '30': 'THIRTY', '\\': '_', six.unichr(160): '_', six.unichr(176): 'DEG_', six.unichr(186): 'DEG_', six.unichr(8211): '_', } def main(): """Main entry point for UNECE code .xls parsing.""" parser = argparse.ArgumentParser( description='Reads in a .xls file and generates a units module for ' 'OpenHTF.', prog='python units_from_xls.py') parser.add_argument('xlsfile', type=str, help='the .xls file to parse') parser.add_argument('--outfile', type=str,
def test_render_response_utf8(self): req = apirequest.APIRequest("FakeAction", "FakeVersion", {}) resp = {'utf8': six.unichr(40960) + u'abcd' + six.unichr(1972)} data = req._render_response(resp, 'uuid').decode() self.assertIn('<utf8>ꀀabcd޴</utf8>', data)
from __future__ import print_function import collections import sys import unicodedata import six from six.moves import range # pylint: disable=redefined-builtin import tensorflow.compat.v1 as tf # Conversion between Unicode and UTF-8, if required (on Python2) _native_to_unicode = (lambda s: s.decode('utf-8')) if six.PY2 else (lambda s: s) # This set contains all letter and number characters. _ALPHANUMERIC_CHAR_SET = set( six.unichr(i) for i in range(sys.maxunicode) if ( unicodedata.category(six.unichr(i)).startswith('L') or unicodedata.category(six.unichr(i)).startswith('N') ) ) def encode(text): """Encode a unicode string as a list of tokens. Args: text: a unicode string Returns: a list of tokens as Unicode strings
def _get_all_chars(): for i in range(0xFFFF): yield six.unichr(i)
class TestSshClient(base.TestCase): SELECT_POLLIN = 1 @mock.patch('paramiko.RSAKey.from_private_key') @mock.patch('six.StringIO') def test_pkey_calls_paramiko_RSAKey(self, cs_mock, rsa_mock): cs_mock.return_value = mock.sentinel.csio pkey = 'mykey' ssh.Client('localhost', 'root', pkey=pkey) rsa_mock.assert_called_once_with(mock.sentinel.csio) cs_mock.assert_called_once_with('mykey') rsa_mock.reset_mock() cs_mock.reset_mock() pkey = mock.sentinel.pkey # Shouldn't call out to load a file from RSAKey, since # a sentinel isn't a basestring... ssh.Client('localhost', 'root', pkey=pkey) self.assertEqual(0, rsa_mock.call_count) self.assertEqual(0, cs_mock.call_count) def _set_ssh_connection_mocks(self): client_mock = mock.MagicMock() client_mock.connect.return_value = True return (self.patch('paramiko.SSHClient'), self.patch('paramiko.AutoAddPolicy'), client_mock) def test_get_ssh_connection(self): c_mock, aa_mock, client_mock = self._set_ssh_connection_mocks() s_mock = self.patch('time.sleep') c_mock.return_value = client_mock aa_mock.return_value = mock.sentinel.aa # Test normal case for successful connection on first try client = ssh.Client('localhost', 'root', timeout=2) client._get_ssh_connection(sleep=1) aa_mock.assert_called_once_with() client_mock.set_missing_host_key_policy.assert_called_once_with( mock.sentinel.aa) expected_connect = [mock.call( 'localhost', port=22, username='******', pkey=None, key_filename=None, look_for_keys=False, timeout=10.0, password=None, sock=None )] self.assertEqual(expected_connect, client_mock.connect.mock_calls) self.assertEqual(0, s_mock.call_count) def test_get_ssh_connection_over_ssh(self): c_mock, aa_mock, client_mock = self._set_ssh_connection_mocks() proxy_client_mock = mock.MagicMock() proxy_client_mock.connect.return_value = True s_mock = self.patch('time.sleep') c_mock.side_effect = [client_mock, proxy_client_mock] aa_mock.return_value = mock.sentinel.aa proxy_client = ssh.Client('proxy-host', 'proxy-user', timeout=2) client = ssh.Client('localhost', 'root', timeout=2, proxy_client=proxy_client) client._get_ssh_connection(sleep=1) aa_mock.assert_has_calls([mock.call(), mock.call()]) proxy_client_mock.set_missing_host_key_policy.assert_called_once_with( mock.sentinel.aa) proxy_expected_connect = [mock.call( 'proxy-host', port=22, username='******', pkey=None, key_filename=None, look_for_keys=False, timeout=10.0, password=None, sock=None )] self.assertEqual(proxy_expected_connect, proxy_client_mock.connect.mock_calls) client_mock.set_missing_host_key_policy.assert_called_once_with( mock.sentinel.aa) expected_connect = [mock.call( 'localhost', port=22, username='******', pkey=None, key_filename=None, look_for_keys=False, timeout=10.0, password=None, sock=proxy_client_mock.get_transport().open_session() )] self.assertEqual(expected_connect, client_mock.connect.mock_calls) self.assertEqual(0, s_mock.call_count) @mock.patch('time.sleep') def test_get_ssh_connection_two_attemps(self, sleep_mock): c_mock, aa_mock, client_mock = self._set_ssh_connection_mocks() c_mock.return_value = client_mock client_mock.connect.side_effect = [ socket.error, mock.MagicMock() ] client = ssh.Client('localhost', 'root', timeout=1) client._get_ssh_connection(sleep=1) # We slept 2 seconds: because sleep is "1" and backoff is "1" too sleep_mock.assert_called_once_with(2) self.assertEqual(2, client_mock.connect.call_count) def test_get_ssh_connection_timeout(self): c_mock, aa_mock, client_mock = self._set_ssh_connection_mocks() timeout = 2 time_mock = self.patch('time.time') time_mock.side_effect = utils.generate_timeout_series(timeout + 1) c_mock.return_value = client_mock client_mock.connect.side_effect = [ socket.error, socket.error, socket.error, ] client = ssh.Client('localhost', 'root', timeout=timeout) # We need to mock LOG here because LOG.info() calls time.time() # in order to preprend a timestamp. with mock.patch.object(ssh, 'LOG'): self.assertRaises(exceptions.SSHTimeout, client._get_ssh_connection) # time.time() should be called twice, first to start the timer # and then to compute the timedelta self.assertEqual(2, time_mock.call_count) @mock.patch('select.POLLIN', SELECT_POLLIN, create=True) def test_timeout_in_exec_command(self): chan_mock, poll_mock, _, _ = ( self._set_mocks_for_select([0, 0, 0], True)) # Test for a timeout condition immediately raised client = ssh.Client('localhost', 'root', timeout=2) with testtools.ExpectedException(exceptions.TimeoutException): client.exec_command("test") chan_mock.fileno.assert_called_once_with() chan_mock.exec_command.assert_called_once_with("test") chan_mock.shutdown_write.assert_called_once_with() poll_mock.register.assert_called_once_with( chan_mock, self.SELECT_POLLIN) poll_mock.poll.assert_called_once_with(10) @mock.patch('select.POLLIN', SELECT_POLLIN, create=True) def test_exec_command(self): chan_mock, poll_mock, select_mock, client_mock = ( self._set_mocks_for_select([[1, 0, 0]], True)) chan_mock.recv_exit_status.return_value = 0 chan_mock.recv.return_value = b'' chan_mock.recv_stderr.return_value = b'' client = ssh.Client('localhost', 'root', timeout=2) client.exec_command("test") chan_mock.fileno.assert_called_once_with() chan_mock.exec_command.assert_called_once_with("test") chan_mock.shutdown_write.assert_called_once_with() select_mock.assert_called_once_with() poll_mock.register.assert_called_once_with( chan_mock, self.SELECT_POLLIN) poll_mock.poll.assert_called_once_with(10) chan_mock.recv_ready.assert_called_once_with() chan_mock.recv.assert_called_once_with(1024) chan_mock.recv_stderr_ready.assert_called_once_with() chan_mock.recv_stderr.assert_called_once_with(1024) chan_mock.recv_exit_status.assert_called_once_with() client_mock.close.assert_called_once_with() def _set_mocks_for_select(self, poll_data, ito_value=False): gsc_mock = self.patch('tempest.lib.common.ssh.Client.' '_get_ssh_connection') ito_mock = self.patch('tempest.lib.common.ssh.Client._is_timed_out') csp_mock = self.patch( 'tempest.lib.common.ssh.Client._can_system_poll') csp_mock.return_value = True select_mock = self.patch('select.poll', create=True) client_mock = mock.MagicMock() tran_mock = mock.MagicMock() chan_mock = mock.MagicMock() poll_mock = mock.MagicMock() select_mock.return_value = poll_mock gsc_mock.return_value = client_mock ito_mock.return_value = ito_value client_mock.get_transport.return_value = tran_mock tran_mock.open_session().__enter__.return_value = chan_mock if isinstance(poll_data[0], list): poll_mock.poll.side_effect = poll_data else: poll_mock.poll.return_value = poll_data return chan_mock, poll_mock, select_mock, client_mock _utf8_string = six.unichr(1071) _utf8_bytes = _utf8_string.encode("utf-8") @mock.patch('select.POLLIN', SELECT_POLLIN, create=True) def test_exec_good_command_output(self): chan_mock, poll_mock, _, _ = ( self._set_mocks_for_select([1, 0, 0])) closed_prop = mock.PropertyMock(return_value=True) type(chan_mock).closed = closed_prop chan_mock.recv_exit_status.return_value = 0 chan_mock.recv.side_effect = [self._utf8_bytes[0:1], self._utf8_bytes[1:], b'R', b''] chan_mock.recv_stderr.return_value = b'' client = ssh.Client('localhost', 'root', timeout=2) out_data = client.exec_command("test") self.assertEqual(self._utf8_string + 'R', out_data) @mock.patch('select.POLLIN', SELECT_POLLIN, create=True) def test_exec_bad_command_output(self): chan_mock, poll_mock, _, _ = ( self._set_mocks_for_select([1, 0, 0])) closed_prop = mock.PropertyMock(return_value=True) type(chan_mock).closed = closed_prop chan_mock.recv_exit_status.return_value = 1 chan_mock.recv.return_value = b'' chan_mock.recv_stderr.side_effect = [b'R', self._utf8_bytes[0:1], self._utf8_bytes[1:], b''] client = ssh.Client('localhost', 'root', timeout=2) exc = self.assertRaises(exceptions.SSHExecCommandFailed, client.exec_command, "test") self.assertIn('R' + self._utf8_string, six.text_type(exc)) def test_exec_command_no_select(self): gsc_mock = self.patch('tempest.lib.common.ssh.Client.' '_get_ssh_connection') csp_mock = self.patch( 'tempest.lib.common.ssh.Client._can_system_poll') csp_mock.return_value = False select_mock = self.patch('select.poll', create=True) client_mock = mock.MagicMock() tran_mock = mock.MagicMock() chan_mock = mock.MagicMock() # Test for proper reading of STDOUT and STDERROR gsc_mock.return_value = client_mock client_mock.get_transport.return_value = tran_mock tran_mock.open_session().__enter__.return_value = chan_mock chan_mock.recv_exit_status.return_value = 0 std_out_mock = mock.MagicMock(StringIO) std_err_mock = mock.MagicMock(StringIO) chan_mock.makefile.return_value = std_out_mock chan_mock.makefile_stderr.return_value = std_err_mock client = ssh.Client('localhost', 'root', timeout=2) client.exec_command("test") chan_mock.makefile.assert_called_once_with('rb', 1024) chan_mock.makefile_stderr.assert_called_once_with('rb', 1024) std_out_mock.read.assert_called_once_with() std_err_mock.read.assert_called_once_with() self.assertFalse(select_mock.called)
def repl_hex_char(match): return unichr(int(match.group(0)[2:], 16))
def _draw_text_as_text(self, gc, x, y, s, prop, angle, ismath, mtext=None): writer = self.writer color = rgb2hex(gc.get_rgb()) style = {} if color != '#000000': style['fill'] = color if gc.get_alpha() != 1.0: style['opacity'] = six.text_type(gc.get_alpha()) if not ismath: font = self._get_font(prop) font.set_text(s, 0.0, flags=LOAD_NO_HINTING) fontsize = prop.get_size_in_points() fontfamily = font.family_name fontstyle = prop.get_style() attrib = {} # Must add "px" to workaround a Firefox bug style['font-size'] = six.text_type(fontsize) + 'px' style['font-family'] = six.text_type(fontfamily) style['font-style'] = prop.get_style().lower() style['font-weight'] = prop.get_weight().lower() attrib['style'] = generate_css(style) if mtext and (angle == 0 or mtext.get_rotation_mode() == "anchor"): # If text anchoring can be supported, get the original # coordinates and add alignment information. # Get anchor coordinates. transform = mtext.get_transform() ax, ay = transform.transform_point(mtext.get_position()) ay = self.height - ay # Don't do vertical anchor alignment. Most applications do not # support 'alignment-baseline' yet. Apply the vertical layout # to the anchor point manually for now. angle_rad = angle * np.pi / 180. dir_vert = np.array([np.sin(angle_rad), np.cos(angle_rad)]) v_offset = np.dot(dir_vert, [(x - ax), (y - ay)]) ax = ax + v_offset * dir_vert[0] ay = ay + v_offset * dir_vert[1] ha_mpl_to_svg = {'left': 'start', 'right': 'end', 'center': 'middle'} style['text-anchor'] = ha_mpl_to_svg[mtext.get_ha()] attrib['x'] = str(ax) attrib['y'] = str(ay) attrib['style'] = generate_css(style) attrib['transform'] = "rotate(%f, %f, %f)" % (-angle, ax, ay) writer.element('text', s, attrib=attrib) else: attrib['transform'] = generate_transform([ ('translate', (x, y)), ('rotate', (-angle,))]) writer.element('text', s, attrib=attrib) if rcParams['svg.fonttype'] == 'svgfont': fontset = self._fonts.setdefault(font.fname, set()) for c in s: fontset.add(ord(c)) else: writer.comment(s) width, height, descent, svg_elements, used_characters = \ self.mathtext_parser.parse(s, 72, prop) svg_glyphs = svg_elements.svg_glyphs svg_rects = svg_elements.svg_rects attrib = {} attrib['style'] = generate_css(style) attrib['transform'] = generate_transform([ ('translate', (x, y)), ('rotate', (-angle,))]) # Apply attributes to 'g', not 'text', because we likely # have some rectangles as well with the same style and # transformation writer.start('g', attrib=attrib) writer.start('text') # Sort the characters by font, and output one tspan for # each spans = {} for font, fontsize, thetext, new_x, new_y, metrics in svg_glyphs: style = generate_css({ 'font-size': six.text_type(fontsize) + 'px', 'font-family': font.family_name, 'font-style': font.style_name.lower(), 'font-weight': font.style_name.lower()}) if thetext == 32: thetext = 0xa0 # non-breaking space spans.setdefault(style, []).append((new_x, -new_y, thetext)) if rcParams['svg.fonttype'] == 'svgfont': for font, fontsize, thetext, new_x, new_y, metrics in svg_glyphs: fontset = self._fonts.setdefault(font.fname, set()) fontset.add(thetext) for style, chars in list(six.iteritems(spans)): chars.sort() same_y = True if len(chars) > 1: last_y = chars[0][1] for i in xrange(1, len(chars)): if chars[i][1] != last_y: same_y = False break if same_y: ys = six.text_type(chars[0][1]) else: ys = ' '.join(six.text_type(c[1]) for c in chars) attrib = { 'style': style, 'x': ' '.join(six.text_type(c[0]) for c in chars), 'y': ys } writer.element( 'tspan', ''.join(unichr(c[2]) for c in chars), attrib=attrib) writer.end('text') if len(svg_rects): for x, y, width, height in svg_rects: writer.element( 'rect', x=six.text_type(x), y=six.text_type(-y + height), width=six.text_type(width), height=six.text_type(height) ) writer.end('g')