Python east_asian_widthの例、unicodedata.east_asian_width Pythonの例

コード例 #1

0

ファイルを表示

ファイル: aafigure.py プロジェクト: git-pull/aafigure

 def __init__(self, text, aspect_ratio=1, textual=False, textual_strict=False, widechars='F,W'):
     """Take a ASCII art figure and store it, prepare for ``recognize``"""
     self.aspect_ratio = float(aspect_ratio)
     self.textual = textual
     self.textual_strict = textual_strict
     # XXX TODO tab expansion
     # detect size of input image, store as list of lines
     self.image = []
     max_x = 0
     y = 0
     # define character widths map
     charwidths = {}
     for key in ['F', 'H', 'W', 'Na', 'A', 'N']:
         if key in widechars.split(','):
             charwidths[key] = 2
         else:
             charwidths[key] = 1
     for line in text.splitlines():
         # extend length by 1 for each wide glyph
         line_len = sum(charwidths[east_asian_width(c)] for c in line)
         max_x = max(max_x, line_len)
         # pad a space for each wide glyph
         padded_line = ''.join(c+' '*(charwidths[east_asian_width(c)]-1) for c in line)
         self.image.append(padded_line)
         y += 1
     self.width = max_x
     self.height = y
     # make sure it's rectangular (extend short lines to max width)
     for y, line in enumerate(self.image):
         if len(line) < max_x:
             self.image[y] = line + ' '*(max_x-len(line))
     # initialize other data structures
     self.classification = [[None]*self.width for y in range(self.height)]
     self.shapes = []
     self.nominal_size = NOMINAL_SIZE

コード例 #2

0

ファイルを表示

ファイル: tabview.py プロジェクト: MetaMemoryT/OpenTrader

    def strpad(self, s, width):
        if width < 1:
            return str()
        if '\n' in s:
            s = s.replace('\n', '\\n')

        # take into account double-width characters
        buf = str()
        buf_width = 0
        for c in s:
            w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
            if buf_width + w > width:
                break
            buf_width += w
            buf += c

        if len(buf) < len(s):
            # truncation occurred
            while buf_width + len(self.trunc_char) > width:
                c = buf[-1]
                w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
                buf = buf[0:-1]
                buf_width -= w
            buf += ' ' * (width - buf_width - len(self.trunc_char))
            buf += self.trunc_char
        elif buf_width < width:
            # padding required
            buf += ' ' * (width - buf_width)

        return buf

コード例 #3

0

ファイルを表示

ファイル: obj.py プロジェクト: mmgen/mmgen

def truncate_str(s,width): # width = screen width
	wide_count = 0
	for i in range(len(s)):
		wide_count += unicodedata.east_asian_width(s[i]) in ('F','W')
		if wide_count + i >= width:
			return s[:i] + ('',' ')[
				unicodedata.east_asian_width(s[i]) in ('F','W')
				and wide_count + i == width]
	else: # pad the string to width if necessary
		return s + ' '*(width-len(s)-wide_count)

コード例 #4

0

ファイルを表示

ファイル: unicode.py プロジェクト: 13768324554/powerline

def strwidth_ucs_2(width_data, string):
	return sum(((
		(
			width_data[east_asian_width(string[i - 1] + symbol)]
		) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
			0
		) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
			width_data[east_asian_width(symbol)]
		)
	) for i, symbol in enumerate(string)))

コード例 #5

0

ファイルを表示

ファイル: width_and_alignment.py プロジェクト: rtulke/terminaltables

def string_width(string):
    """Get the visible width of a unicode string.

    Some CJK unicode characters are more than one byte unlike ASCII and latin unicode characters.

    From: https://github.com/Robpol86/terminaltables/pull/9

    :param str string: String to measure.

    :return: String's width.
    :rtype: int
    """
    # Colorclass instance.
    if hasattr(string, 'value_no_colors'):
        string = string.value_no_colors

    # Convert to unicode.
    try:
        decoded = string.decode('u8')
    except (AttributeError, UnicodeEncodeError):
        decoded = string

    width = 0
    for char in decoded:
        if unicodedata.east_asian_width(char) in ('F', 'W'):
            width += 2
        else:
            width += 1

    return width

コード例 #6

0

ファイルを表示

ファイル: tables.py プロジェクト: 4ndrej/uberwriter-lite

def cjk_width(text):
    import sys
    if sys.version_info[0] < 3:
        if not isinstance(text, unicode):
            text = text.decode("utf-8")
    from unicodedata import east_asian_width
    return sum(1+(east_asian_width(c) in "WF") for c in text)

コード例 #7

0

ファイルを表示

ファイル: wrap.py プロジェクト: wtolson/uniseg-python

def tt_width(s, index=0, ambiguous_as_wide=False):
    
    """Return logical width of the grapheme cluster at `s[index]` on 
    fixed-width typography
    
    Return value will be ``1`` (halfwidth) or ``2`` (fullwidth).
    
    Generally, the width of a grapheme cluster is determined by its leading 
    code point.
    
    >>> tt_width('A')
    1
    >>> tt_width('\\u8240')     # U+8240: CJK UNIFIED IDEOGRAPH-8240
    2
    >>> tt_width('g\\u0308')    # U+0308: COMBINING DIAERESIS
    1
    >>> tt_width('\\U00029e3d') # U+29E3D: CJK UNIFIED IDEOGRAPH-29E3D
    2
    
    If `ambiguous_as_wide` is specified to ``True``, some characters such as 
    greek alphabets are treated as they have fullwidth as well as ideographics 
    does.
    
    >>> tt_width('\\u03b1')     # U+03B1: GREEK SMALL LETTER ALPHA
    1
    >>> tt_width('\\u03b1', ambiguous_as_wide=True)
    2
    """
    cp = code_point(s, index)
    eaw = east_asian_width(cp)
    if eaw in ('W', 'F') or (eaw == 'A' and ambiguous_as_wide):
        return 2
    return 1

コード例 #8

0

ファイルを表示

ファイル: format.py プロジェクト: enchuu/yaytp

def get_real_width(str):
    """Gets real width of a string accounting for double width characters."""

    real_width = 0
    for char in str:
        real_width += 2 if east_asian_width(char) == 'W' else 1
    return real_width

コード例 #9

0

ファイルを表示

ファイル: obj.py プロジェクト: mmgen/mmgen

	def __new__(cls,s,on_fail='die',msg=None):
		if type(s) == cls: return s
		cls.arg_chk(on_fail)
		for k in cls.forbidden,cls.allowed:
			assert type(k) == list
			for ch in k: assert type(ch) == str and len(ch) == 1
		try:
			s = s.strip()
			if type(s) != str:
				s = s.decode('utf8')
			for ch in s:
				# Allow:    (L)etter,(N)umber,(P)unctuation,(S)ymbol,(Z)space
				# Disallow: (C)ontrol,(M)combining
				# Combining characters create width formatting issues, so disallow them for now
				if unicodedata.category(ch)[0] in 'CM':
					t = { 'C':'control', 'M':'combining' }[unicodedata.category(ch)[0]]
					raise ValueError('{}: {} characters not allowed'.format(ascii(ch),t))
			me = str.__new__(cls,s)
			if cls.max_screen_width:
				me.screen_width = len(s) + len([1 for ch in s if unicodedata.east_asian_width(ch) in ('F','W')])
				assert me.screen_width <= cls.max_screen_width,(
					'too wide (>{} screen width)'.format(cls.max_screen_width))
			else:
				assert len(s) <= cls.max_len, 'too long (>{} symbols)'.format(cls.max_len)
			assert len(s) >= cls.min_len, 'too short (<{} symbols)'.format(cls.min_len)
			assert not cls.allowed or set(list(s)).issubset(set(cls.allowed)),\
				'contains non-allowed symbols: {}'.format(' '.join(set(list(s)) - set(cls.allowed)))
			assert not cls.forbidden or not any(ch in s for ch in cls.forbidden),\
				"contains one of these forbidden symbols: '{}'".format("', '".join(cls.forbidden))
			return me
		except Exception as e:
			return cls.init_fail(e,s)

コード例 #10

0

ファイルを表示

ファイル: width_and_alignment.py プロジェクト: liiight/terminaltables

def visible_width(string):
    """Get the visible width of a unicode string.

    Some CJK unicode characters are more than one byte unlike ASCII and latin unicode characters.

    From: https://github.com/Robpol86/terminaltables/pull/9

    :param str string: String to measure.

    :return: String's width.
    :rtype: int
    """
    if '\033' in string:
        string = RE_COLOR_ANSI.sub('', string)

    # Convert to unicode.
    try:
        string = string.decode('u8')
    except (AttributeError, UnicodeEncodeError):
        pass

    width = 0
    for char in string:
        if unicodedata.east_asian_width(char) in ('F', 'W'):
            width += 2
        else:
            width += 1

    return width

コード例 #11

0

ファイルを表示

ファイル: schedule.py プロジェクト: yoshrc/sch

def charwidth(char):
    # 私の環境では、Full-widthもAmbiguousもNot East Asianも
    # 半角文字として扱われていた
    if unicodedata.east_asian_width(char) == 'W':
        return 2
    else:
        return 1

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: leinadlime/tmux2html

 def unisub(m):
     c = m.group(1)
     w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
     if w == 2:
         self.line_l += 1
     return '<span class="u" data-glyph="&#x{0:x};">{1}</span>' \
         .format(ord(c), ' ' * w)

コード例 #13

0

ファイルを表示

ファイル: console_attr.py プロジェクト: flgiordano/netcash

def GetCharacterDisplayWidth(char):
  """Returns the monospaced terminal display width of char.

  Assumptions:
    - monospaced display
    - ambiguous or unknown chars default to width 1
    - ASCII control char width is 1 => don't use this for control chars

  Args:
    char: The character to determine the display width of.

  Returns:
    The monospaced terminal display width of char: either 0, 1, or 2.
  """
  if not isinstance(char, unicode):
    # Non-unicode chars have width 1. Don't use this function on control chars.
    return 1

  # Normalize to avoid special cases.
  char = unicodedata.normalize('NFC', char)

  if unicodedata.combining(char) != 0:
    # Modifies the previous character and does not move the cursor.
    return 0
  elif unicodedata.category(char) == 'Cf':
    # Unprintable formatting char.
    return 0
  elif unicodedata.east_asian_width(char) in 'FW':
    # Fullwidth or Wide chars take 2 character positions.
    return 2
  else:
    # Don't use this function on control chars.
    return 1

コード例 #14

0

ファイルを表示

ファイル: Print.py プロジェクト: LinYuanLab/ulipad

 def textwrap(ustr, width=80, leadingspace=6):
     import unicodedata
     
     if not ustr:
         return []
     
     lines = []
     i = 0
     buf = []
     for c in ustr:
         if unicodedata.east_asian_width(c) != 'Na':
             d = 2
         else:
             d = 1
         if i+d <= width:
             buf.append(c)
             i += d
         else:
             lines.append(''.join(buf))
             buf = [leadingspace*' ' + c]
             i = d + leadingspace
     if buf:
         lines.append(''.join(buf))
     
     return lines

コード例 #15

0

ファイルを表示

ファイル: icdiff.py プロジェクト: sharad/rc

 def width(c):
     if ((isinstance(c, type(u"")) and
          unicodedata.east_asian_width(c) == 'W')):
         return 2
     elif c == '\r':
         return 2
     return 1

コード例 #16

0

ファイルを表示

ファイル: amuse.py プロジェクト: scyptnex/bootstrap

def strong_width(s):
    if not s in _strong_width_map:
        w = 0
        for c in s:
            w += 2 if unicodedata.east_asian_width(c) in ("F", "W") else len(unicodedata.normalize('NFC', c))
        _strong_width_map[s] = w
    return _strong_width_map[s]

コード例 #17

0

ファイルを表示

ファイル: md-table-formatter.py プロジェクト: qwerqwer1111/md-table-formatter

def get_text_width(text):
    num_full_width = 0
    for c in text:
        eaw = unicodedata.east_asian_width(c)
        if eaw in (u'W', u'F', u'A'):
            num_full_width += 1
    return len(text) + num_full_width

コード例 #18

0

ファイルを表示

ファイル: oneweather.py プロジェクト: pocmon/oneweather

def main():

    # 默认城市
    city = "深圳"
    # 判断汉字条件
    chz = True
    phz = True

    if len(sys.argv) > 2:
        print("\n请输入正确的命令：oneweathert.py [中国城市名称]，目前本程序只支持一个城市的天气询查！")
        print("如：oneweathert.py 北京 , 不带城市名称，默认城市：" + city)
    else:
        if len(sys.argv) == 1:

            print("\n可输入城市名参数：oneweathert.py [中国城市名称/默认" + city + "],如：oneweathert.py 北京")
            winfo(city)

        elif len(sys.argv) == 2:

            # 判断参数是否为汉字，只支持汉字
            for cs in range(0, len(sys.argv[1])):

                if unicodedata.east_asian_width(sys.argv[1][cs]) != "Na":
                    chz = True
                else:
                    chz = False

                phz = chz and phz

            if not phz:
                print("\n请检查城市名称是否正确，本程序只支持中国中文城市名称！")
            else:
                city = sys.argv[1]
                winfo(city)

コード例 #19

0

ファイルを表示

ファイル: str_width.py プロジェクト: suizokukan/logotheras

def string_width(string):
    """
        Return the width of <string>.

        E.g. : "abc"    -> 3
               "日本語"  -> 6
    """
    return sum(WIDTHMAP[unicodedata.east_asian_width(c)] for c in string)

コード例 #20

0

ファイルを表示

ファイル: utils.py プロジェクト: yongboy/yongblog

def east_asian_column_width(text):
    if isinstance(text, types.UnicodeType):
        total = 0
        for c in text:
            total += east_asian_widths[unicodedata.east_asian_width(c)]
        return total
    else:
        return len(text)

コード例 #21

0

ファイルを表示

ファイル: list_songs.py プロジェクト: treejames/MusicService

def calc_string_width(s):
    """ return extra width generated by east-asian chars"""
    extra_width = 0
    for c in s:
        ctype = unicodedata.east_asian_width(c)
        if ctype == "F" or ctype == "W" or ctype == "A":
            extra_width += 1
    return extra_width, len(s) + extra_width

コード例 #22

0

ファイルを表示

ファイル: widestring.py プロジェクト: MattWoelk/ranger

def string_to_charlist(string):
	"""Return a list of characters with extra empty strings after wide chars"""
	if not set(string) - ASCIIONLY:
		return list(string)
	result = []
	if PY3:
		for c in string:
			result.append(c)
			if east_asian_width(c) in WIDE_SYMBOLS:
				result.append('')
	else:
		string = string.decode('utf-8', 'ignore')
		for c in string:
			result.append(c.encode('utf-8'))
			if east_asian_width(c) in WIDE_SYMBOLS:
				result.append('')
	return result

コード例 #23

0

ファイルを表示

ファイル: unicode.py プロジェクト: 13768324554/powerline

def strwidth_ucs_4(width_data, string):
	return sum(((
		(
			0
		) if combining(symbol) else (
			width_data[east_asian_width(symbol)]
		)
	) for symbol in string))

コード例 #24

0

ファイルを表示

ファイル: utils.py プロジェクト: mmitti/ServerLoginInfo

def mlen(u):
   n = 0
   for c in u:
      wide_chars = u"WFA"
      eaw = unicodedata.east_asian_width(c)
      if(wide_chars.find(eaw) > -1):
         n +=1
   return n + len(u)

コード例 #25

0

ファイルを表示

ファイル: alignment_visualizer.py プロジェクト: philip30/chainn

def count_len(data):
    ret = 0
    for c in data:
        if unicodedata.east_asian_width(c) == 'W':
            ret += 2
        else:
            ret += 1
    return ret

コード例 #26

0

ファイルを表示

ファイル: wgtextboxunicode.py プロジェクト: vanloswang/npyscreen

 def find_width_of_char(self, char):
     return 1
     w = unicodedata.east_asian_width(char)
     if w == "A":
         # Abiguous - allow 1, but be aware that this could well be wrong
         return 1
     else:
         return self.__class__.width_mapping[w]

コード例 #27

0

ファイルを表示

ファイル: elastic_tabstops.py プロジェクト: ulidtko/ElasticTabstops

def unicode_char_width(c):
	""" Wide chars are Chinese ideographs, Japanese kanji and alike.
		They get two columns of space to render.
	"""
	return {
		'Na': 1, 'N': 1, 'H': 1,
		'W': 2, 'F': 2
		} [east_asian_width(c)]

コード例 #28

0

ファイルを表示

ファイル: strings.py プロジェクト: timofurrer/lettuce

def column_width(string):
    l = 0
    for c in unicode(string):
        if unicodedata.east_asian_width(c) in "WF":
            l += 2
        else:
            l += 1
    return l

コード例 #29

0

ファイルを表示

ファイル: __init__.py プロジェクト: KTAtkinson/pandas

 def east_asian_len(data, encoding=None, ambiguous_width=1):
     """
     Calculate display width considering unicode East Asian Width
     """
     if isinstance(data, text_type):
         return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
     else:
         return len(data)

コード例 #30

0

ファイルを表示

ファイル: table.py プロジェクト: kimmobrunfeldt/pieces

 def _width_when_printed(self, mixed):
     """Counts text's actual width in terminal when
     fixed-width font is used. http://unicode.org/reports/tr11/ is more
     information about W and F chars."""
     text = self._all_to_unicode(mixed)
     text = self._strip_nonprintable(text)
     return sum(1 + (unicodedata.east_asian_width(c) in "WF") \
                for c in text)

コード例 #31

0

ファイルを表示

ファイル: utils.py プロジェクト: xdemonzz/GenshinArtScanner

def calcFormatWidth(text, target):
    return target - sum(unicodedata.east_asian_width(c) in 'WF' for c in text)

コード例 #32

0

ファイルを表示

ファイル: compute-wer.py プロジェクト: placebokkk/wenet

def width(string):
  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)

コード例 #33

0

ファイルを表示

def unicode_width(s):
    if is_unicode(s):
        return sum(__unicode_width_mapping[east_asian_width(c)] for c in s)

    assert is_strlike(s), 's must be a string, not %s.' % type(s).__name__
    return len(s)

コード例 #34

0

ファイルを表示

    def _wrap(text, width, indent, firstindent):
        if not text:
            return ''
        text = text.strip()
        s = []
        pos = 0
        for i in rx.finditer(text):
            if i.start() > pos:
                s.extend(text[pos:i.start()].split())
            s.append(i.group())
            pos = i.end()
        if pos < len(text):
            s.extend(text[pos:].split())

        ss = [s[0]]
        #get first element character is asian char flag
        flag = unicodedata.east_asian_width(s[0][0]) != 'Na'
        for i in range(1, len(s)):
            f = unicodedata.east_asian_width(s[i][0]) != 'Na'
            if f and f == flag:
                ss[-1] = ss[-1] + s[i]
            else:
                ss.append(s[i])
            flag = f

        s = ss

        t = []
        y = 0
        buf = []
        x = 0
        while s:
            i = s.pop(0)
            if unicodedata.east_asian_width(i[0]) != 'Na':
                factor = 2
            else:
                factor = 1

            if x == 0:
                w = width - len(firstindent)
                step = firstindent
            else:
                w = width - len(indent)
                step = indent
            length = y + len(i) * factor + len(buf)
            #            print 'length', length, s[0].encode('gbk')
            if length == w:
                buf.append(i)
                _add_line(t, step + ' '.join(buf))
                #                t.append(step + ' '.join(buf))
                x = 1
                buf = []
                y = 0
            elif length > w:
                if factor == 2 or (factor == 1 and len(i) * factor >= w):
                    buf_len = len(buf)
                    rest = w - y - buf_len
                    buf.append(i[:rest / factor])
                    #                    print '----', w, y, buf_len, (w-y-buf_len-1), buf
                    _add_line(t, step + ' '.join(buf))
                    #                    t.append(step + ' '.join(buf))
                    x = 1
                    s.insert(0, i[rest / factor:])
                    buf = []
                    y = 0
                    continue
                else:
                    _add_line(t, step + ' '.join(buf))
                    #                    t.append(step + ' '.join(buf))
                    x = 1
                    s.insert(0, i)
                    buf = []
                    y = 0
                    continue

            else:
                buf.append(i)
                y += factor * len(i)

        if buf:
            _add_line(t, step + ' '.join(buf), '')


#            t.append(step + ' '.join(buf))
        return cr.join(t)

コード例 #35

0

ファイルを表示

ファイル: rst.py プロジェクト: thomas-nilsson-irfu/sphinx

 def charwidth(char: str, widechars: str) -> int:
     if east_asian_width(char) in widechars:
         return 2
     else:
         return 1

コード例 #36

0

ファイルを表示

def text_len(text):
    count = 0
    for c in text:
        count += 2 if unicodedata.east_asian_width(c) in 'FWA' else 1
    return count

コード例 #37

0

ファイルを表示

def display_length(text):
    ''' 字符长度，1 个汉字当 2 个英文 '''
    width = 0
    for char in text:
        width += east_asian_width(char) == 'Na' and 1 or 2
    return width

コード例 #38

0

ファイルを表示

ファイル: jd.py プロジェクト: nju04zq/jd_pc

def chr_width(c):
    if (unicodedata.east_asian_width(c) in ('F', 'W', 'A')):
        return 2
    else:
        return 1

コード例 #39

0

ファイルを表示

ファイル: DataConverter.py プロジェクト: gh640/SublimeDataConverter

def _countwide(string):
    '''Count the numer of wide characters in a string.'''
    return sum(unicodedata.east_asian_width(char) == 'W' for char in string)

コード例 #40

0

ファイルを表示

def get_zen_count(text):
	count=0
	for c in text:
		if unicodedata.east_asian_width(c) in "FWA":
			count +=1
	return count

コード例 #41

0

ファイルを表示

 def get_single_with(base) -> int:
     if unicodedata.east_asian_width(u"%s"%base) in ("F", "W"):
         return 2
     else:
         return 1

コード例 #42

0

ファイルを表示

def isfullwidth(token):
    return all(
        unicodedata.east_asian_width(char) in ['W', 'F', 'A']
        for char in token)

コード例 #43

0

ファイルを表示

ファイル: test.py プロジェクト: kalupas226/RLOther

        blogs.append(BASE_URL + link.attrs['href'])

# 2. 1で取得したURLを全てクローリングし、BODYタグの全ての全角文字を抽出する
# 全てのblogを走査
contents_lo = ''
for blog in blogs:
    html = req.get(blog).text
    # BeautifulSoupでHTMLを解析
    soup = bs(html, 'html.parser')
    for script in soup(["script", "style"]):
        script.extract()
        # bodyコンテンツ内のinner textのみを抽出
    contents = soup.find('body').get_text()
    # extractで救いきれなかったので、unicodedata.east_asian_widthで全角かどうか判定
    for uniStr in contents:
        str_width = unicodedata.east_asian_width(uniStr)
        if str_width == 'W':
            contents_lo += uniStr

# 3. 2で抽出した全角文字を形態素解析に掛けて、一般名詞のみを抽出する
# 形態素解析にかけて、一般名詞だけを抽出
m = mc.Tagger('mecabrc')
mecab_result = m.parse(contents_lo)
info_of_words = mecab_result.split('\n')
words = []
for info in info_of_words:
    if '\t' in info:
        kind = info.split('\t')[1].split(',')[0]
        category = info.split('\t')[1].split(',')[1]
        if kind == '名詞' and category == '一般' and (info.split('\t')[0]
                                                  not in EXCLUDE_STR_LIST):

コード例 #44

0

ファイルを表示

ファイル: bar_chart.py プロジェクト: soohyunc/data_hacks

def run(input_stream, options, encoding='utf8'):
    data = defaultdict(int)
    total = 0
    type_ = float
    for row in input_stream:
        if options.agg_key_value:
            kv = row.rstrip().rsplit(None, 1)
            if kv[1].isdigit():
                type_ = int
            value = type_(kv[1])
            data[kv[0].decode(encoding)] += value
            total += value
        elif options.agg_value_key:
            kv = row.lstrip().split(None, 1)
            if kv[0].isdigit():
                type_ = int
            value = type_(kv[0])
            data[kv[1].decode(encoding)] += value
            total += value
        else:
            row = row.decode(encoding)
            data[row] += 1
            total += 1

    if not data:
        print "Error: no data"
        sys.exit(1)

    max_length = max([len(key) for key in data.keys()])
    max_length = min(max_length, int(options.max_key_length))
    value_characters = int(options.max_key_length) + 30 - max_length
    max_value = max(data.values())
    scale = int(math.ceil(float(max_value) / value_characters))
    scale = max(1, scale)

    print("# each " + options.dot + " represents a count of %d. total %d" %
          (scale, total)).encode(encoding)

    if options.sort_values:
        data = [[v, k] for k, v in data.items()]
        data.sort(key=lambda x: x[0], reverse=options.reverse_sort)
    else:
        # sort by keys
        data = [[v, k] for k, v in data.items()]
        if options.numeric_sort:
            # keys could be numeric too
            data.sort(key=lambda x: (Decimal(x[1])),
                      reverse=options.reverse_sort)
        else:
            data.sort(key=lambda x: x[1], reverse=options.reverse_sort)

    percentage = ""
    if options.lines:
        nlines = int(options.lines)
    else:
        nlines = None
    for value, key in data[:nlines]:
        if options.percentage:
            percentage = " (%0.2f%%)" % (100 * Decimal(value) / Decimal(total))
        name = [(c, len(c) +
                 sum(1 for d in c if unicodedata.east_asian_width(d) == 'W'))
                for c in key]
        title = u''
        cum = 0
        for c, l in name:
            cum += l
            if cum <= max_length:
                title += c
            else:
                break

        pad = u' ' * (max_length - cum)

        if isinstance(value, int):
            formatted_string = "%s%s [%6d] %s%s"
        else:
            formatted_string = "%s%s [%6.2f] %s%s"
        print (formatted_string %
               (pad, title[:max_length], value, int(value / scale) * options.dot, percentage))\
            .encode(encoding)

コード例 #45

0

ファイルを表示

ファイル: python_unicodedata_east_asian_width_count.py プロジェクト: nakanishi-akitaka/python2018_backup

# -*- coding: utf-8 -*-
"""
https://note.nkmk.me/python-unicodedata-east-asian-width-count/
Created on Mon Oct 29 16:17:03 2018

@author: Akitaka
"""
import unicodedata

print(unicodedata.east_asian_width('あ'))  # 全角かな
print(type(unicodedata.east_asian_width('あ')))
# W
# <class 'str'>

print(unicodedata.east_asian_width('a'))  # 半角英数
# Na

print(unicodedata.east_asian_width('Ａ'))  # 全角英数
# F

print(unicodedata.east_asian_width('ｱ'))  # 半角カナ
# H

print(unicodedata.east_asian_width('Å'))  # 特殊文字（例: オングストローム）
# A

import unicodedata


def get_east_asian_width_count(text):
    count = 0

コード例 #46

0

ファイルを表示

def strlen(s):
    return sum(1 + (unicodedata.east_asian_width(c) in "WFA") for c in s)

コード例 #47

0

ファイルを表示

async def end(ctx, boss: str, time: str):
    global notes
    msg = ""
    target_boss = ""

    #入力値を登録ボス名へ変換
    target_boss = sub_module.ChangeName(boss)

    #入力コマンドの正常性判定
    if target_boss == "":
        await ctx.send('入力されたボス名が正しくありません :sob:\n再入力してください :pray:')
        sys.exit()
    else:
        msg = '【' + target_boss + '】の登録を受け付けました :memo: '

    if not int(len(time)) == 4:
        await ctx.send('入力時間が4桁ではありません :sob:\n再入力してください :pray:')
        sys.exit()
    for c in time:
        if (unicodedata.east_asian_width(str(c))
                == 'F') or (unicodedata.east_asian_width(str(c)) == 'W'):
            await ctx.send('入力時間に全角文字が含まれています :sob:\n再入力してください :pray:')
            sys.exit()

    #次回出現時間の作成
    cyc = ""
    notes = ""
    cnt = 0
    update_row = 0
    target_time = ''

    end_date = datetime.now(pytz.timezone('Asia/Tokyo')).strftime('%Y/%m/%d')
    end_hour = time[:2]
    end_min = time[2:]
    last_time = end_date + ' ' + end_hour + ':' + end_min

    with open("./data/BossList.csv", "r", encoding="utf_8") as read_csv:
        reader = csv.reader(read_csv)
        header = next(reader)
        for row in reader:
            cnt = cnt + 1
            if row[0] == target_boss:
                update_row = cnt - 1
                notes = ':map: : ' + row[1]
                if len(row[2]) == 5:
                    cyc = row[2]
                    cyc_hour = cyc[:2]
                    cyc_min = cyc[3:]
                if row[3] == "o":
                    notes = notes + ' , ランダム出現だよ :cyclone:'

    if cyc:
        end_hour = str(int(end_hour) + int(cyc_hour))
        end_min = str(int(end_min) + int(cyc_min))
        if int(end_min) > 59:
            end_hour = str(int(end_hour) + 1)
            end_min = str(int(end_min) - 60)
        if int(end_hour) > 23:
            end_hour = str(int(end_hour) - 24)
        if len(end_hour) == 1:
            end_hour = '0' + end_hour
        if len(end_min) == 1:
            end_min = '0' + end_min
        target_time = end_hour + ':' + end_min
        msg = msg + '\n次回出現時間の5分前 <' + sub_module.MakeTime(
            target_time) + '> にリマインダーをセットしました :alarm_clock:'

    msg = msg + '\n(' + notes + ')'

    #更新処理
    if not target_time == '':
        with open('./data/Schedule.csv', 'a', newline='',
                  encoding="utf_8") as write_csv:
            writer = csv.writer(write_csv)
            writer.writerow([target_time, target_boss, 'temp', '出現', notes])
        write_csv.close()

    if update_row > -1:
        df = pd.read_csv('./data/BossList.csv', encoding="utf_8")
        df.loc[update_row, 'last time'] = last_time
        df.to_csv('./data/BossList.csv', index=False)

    #情報登録・リマインダー設定の通知
    await ctx.send(msg)

コード例 #48

0

ファイルを表示

def string_len_dwc(string: str) -> int:
    """Returns string len including count for double width characters"""
    return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)

コード例 #49

0

ファイルを表示

async def set(ctx, boss: str, time: str):
    global notes
    msg = ""
    target_boss = ""

    #入力値を登録ボス名へ変換
    target_boss = sub_module.ChangeName(boss)

    #入力コマンドの正常性判定
    if target_boss == "":
        await ctx.send('入力されたボス名が正しくありません :sob:\n再入力してください :pray:')
        sys.exit()
    else:
        msg = '【' + target_boss + '】の登録を受け付けました :memo: '

    if not int(len(time)) == 4:
        await ctx.send('入力時間が4桁ではありません :sob:\n再入力してください :pray:')
        sys.exit()
    for c in time:
        if (unicodedata.east_asian_width(str(c))
                == 'F') or (unicodedata.east_asian_width(str(c)) == 'W'):
            await ctx.send('入力時間に全角文字が含まれています :sob:\n再入力してください :pray:')
            sys.exit()

    #リマインダーの設定
    target_time = ""
    set_hour = time[:2]
    set_min = time[2:]
    notes = ""
    with open("./data/BossList.csv", "r", encoding="utf_8") as read_csv:
        reader = csv.reader(read_csv)
        header = next(reader)
        for row in reader:
            if row[0] == target_boss:
                notes = ':map: : ' + row[1]
                if row[3] == "o":
                    notes = notes + ' , ランダム出現だよ :cyclone:'
                else:
                    notes = notes + ')'

    set_min = str(int(set_min) + 5)
    if int(set_min) > 60:
        set_min = str(int(set_min) - 60)
        set_hour = str(int(set_hour) + 1)
    if len(set_min) == 1:
        set_min = '0' + set_min
    if len(set_hour) == 1:
        set_hour = '0' + set_hour
    target_time = set_hour + ':' + set_min

    msg = msg + '\n <' + sub_module.MakeTime(
        target_time) + '> にリマインダーをセットしました :alarm_clock:\n'
    msg = msg + '(' + notes + ')'

    #更新処理
    with open('./data/Schedule.csv', 'a', newline='',
              encoding="utf_8") as write_csv:
        writer = csv.writer(write_csv)
        writer.writerow([target_time, target_boss, 'temp', '出現', notes])
    write_csv.close()

    #リマインダー設定の通知
    await ctx.send(msg)

コード例 #50

0

ファイルを表示

ファイル: editor.py プロジェクト: velavokr/asyncssh

def _is_wide(ch):
    """Return display width of character"""

    return east_asian_width(ch) in 'WF'

コード例 #51

0

ファイルを表示

def Check(scr):
    for char in scr:
        if unicodedata.east_asian_width(char) != 'Na':
            return True
    return False

コード例 #52

0

ファイルを表示

ファイル: aligner.py プロジェクト: suzuki-hoge/table-converter

def _charwidth(c):
    return 2 if unicodedata.east_asian_width(c) in ['F', 'W', 'A'] else 1

コード例 #53

0

ファイルを表示

def charwidth(c):
    wc = unicodedata.east_asian_width(c)
    return 2 if wc == 'F' or wc == 'W' else 1

コード例 #54

0

ファイルを表示

def get_string_display_width(arg_string):
    '''
    Returns a real display width of string
    '''
    return (sum(1 + (unicodedata.east_asian_width(c) in "WF")
                for c in arg_string))

コード例 #55

0

ファイルを表示

def utf_char_width(string):
    """Return the width of a single character"""
    if east_asian_width(string) in WIDE_SYMBOLS:
        return WIDE
    return NARROW

コード例 #56

0

ファイルを表示

ファイル: info.py プロジェクト: Knight-Playz-3/Scarecrow

    async def charinfo(self, ctx, *, data: str):
        """Shows information about one or several characters.

        'data' can either be a character, a unicode escape sequence, a unicode character name or a string.
        If 'data' is a string only a summary of each character's info will be displayed.
        """
        data = data.lower()

        if data.startswith('\\u'):
            # Let's interpret the unicode escape sequence
            hex_values = data.split('\\u')[1:]
            try:
                code_points = [int(val, 16) for val in hex_values]
            except ValueError:
                raise commands.BadArgument('Invalid unicode escape sequence.')
            else:
                data = ''.join(chr(cp) for cp in code_points)
        elif len(data) > 1:
            # Maybe we've been given the character's name ?
            try:
                data = unicodedata.lookup(data)
            except KeyError:
                pass

        # Normalise the input
        data = unicodedata.normalize('NFC', data)
        url_fmt = '<http://unicode-table.com/en/{:X}>'

        if len(data) == 1:
            # Detailed info on the character
            entries = [('Character', data),
                       ('Name', unicodedata.name(data, 'None')),
                       ('Code point', f'{ord(data):04x}')]
            decomposition = unicodedata.decomposition(data)
            if decomposition != '':
                entries.append(('Decomposition', decomposition))

            combining = unicodedata.combining(data)
            if combining:
                entries.append(('Combining class', combining))

            entries.append(('Category', unicodedata.category(data)))
            bidirectional = unicodedata.bidirectional(data)
            entries.append(('Bidirectional',
                            bidirectional if bidirectional != '' else 'None'))
            entries.append(
                ('Mirrored',
                 'True' if unicodedata.mirrored(data) == 1 else 'False'))
            entries.append(
                ('East asian width', unicodedata.east_asian_width(data)))
            entries.append(('Url', url_fmt.format(ord(data))))

            # Create the message's content and send it
            content = utils.indented_entry_to_str(entries)
            await ctx.send(utils.format_block(content))
        else:
            # Minimal info for each character
            entries = [
                f'`\N{ZERO WIDTH SPACE}{c}\N{ZERO WIDTH SPACE}` | `\\u{ord(c):04x}` | `{unicodedata.name(c, "None")}` | {url_fmt.format(ord(c))}'
                for c in data
            ]
            content = '\n'.join(entries)
            await ctx.send(content)

コード例 #57

0

ファイルを表示

 def __init__(self, char):
     self.char = char
     self.name = unicodedata.name(char)
     self.category = unicodedata.category(char)
     self.east_asian_width = unicodedata.east_asian_width(char)

コード例 #58

0

ファイルを表示

def print_errors(response, api_url, print_color=True, rules=False, rule_categories=False, explain_rule=False, use_html=False):
    matches = response["matches"]
    language = response["language"]
    version = response["software"]["name"] + " " + response["software"]["version"]

    def colored(text, color):
        if print_color:
            if not use_html:
                init_colors(strip=False)
                return color + text + Fore.RESET
            else:
                return htmlma.htmlColored(color,text)
        else:
            return text

    # if DIAGNOSE:
    #     print(colored(
    #         "{} detected ({:.0f}% confidence)".format(language["detectedLanguage"]["name"],
    #                                                 language["detectedLanguage"]["confidence"] * 100)
    #         , Fore.LIGHTBLACK_EX))
    #     if language["detectedLanguage"]["code"] != language["code"]:
    #         print(colored(
    #             "checking as {} text because of setting".format(language["name"])
    #             , Fore.LIGHTBLACK_EX))
    #     print()
    # 27june2021/sik - Make it more informative.
    # 16dec2021/sik - commented out because very error prone.
    # if language["detectedLanguage"]["code"] != language["code"]:
    #     print(colored(
    #     "{} detected ({:.0f}% confidence)".format(language["detectedLanguage"]["name"],
    #                                             language["detectedLanguage"]["confidence"] * 100)
    #     , Fore.LIGHTBLACK_EX))
    #     print(colored(
    #         "checking as {} text because of setting".format(language["name"])
    #         , Fore.LIGHTBLACK_EX))

    tick = colored(u"\u2713", Fore.LIGHTGREEN_EX) + " "
    cross = colored(u"\u2717", Fore.LIGHTRED_EX) + " "

    rule_explanations = []

    for error in matches:
        context_object = error["context"]
        context = context_object["text"]
        length = context_object["length"]
        offset = context_object["offset"]
        sentence = error["sentence"]  # use sentence instead
        diag = False
        if language["code"] in ["ja","ja-JP"]:
            if not context.startswith("..."):
                #context = sentence # use sentence instead
                pass
            # recalculate offset value 
            #if context[offset:offset+length] == 
            #diag = True

        if DIAGNOSE or diag:
            print(f"length={length} offset={offset} error={error} language={language}")
        count = 0
        ruler = ""
        ruler_len = length
        space_before_offset = 0
        enable_dbcs = False   # whether DBCS specicfic logic is needed here or not
        pc = ""
        pstate = ""
        for c in context:
            cstate = unicodedata.east_asian_width(c)
            if count <= offset+space_before_offset+2:
                if cstate == "W":
                    if pc == " ":
                        space_before_offset += 1
                        enable_dbcs = True
                else:
                    if c == " "  and pstate== "W":
                        space_before_offset += 1
                        enable_dbcs = True
                if c == "。":
                    space_before_offset = 0
            pstate = cstate
            pc = c 
            count += 1
                       
        count = 0
        for c in context:
            cstate = unicodedata.east_asian_width(c)        
            if count >= offset+space_before_offset and count < offset+space_before_offset+length:
                if cstate == "W":
                    ruler += "〜"
                else:
                    ruler += "^"
            elif count <= offset+space_before_offset+length:
                if cstate == "W":
                    ruler += "　"
                else:
                    ruler += " "

            if DIAGNOSE or diag:
                print(f"{count} {cstate}: {c} - {ruler} {cstate} sbo={space_before_offset}")
            count += 1
        # temporary bug fix? 
        if DIAGNOSE or diag:
            print(f"old: length={length} offset={offset} enable_dbcs={enable_dbcs}")
        if space_before_offset and enable_dbcs:
            offset += space_before_offset
        if DIAGNOSE or diag:
            print(f"new: length={length} offset={offset}")
        endpostion = offset + length
        print(error["message"])

        print(
            indention[:2] +
            cross +
            colored(context[:offset], Fore.LIGHTBLACK_EX) +
            colored(context[offset:endpostion], Fore.LIGHTRED_EX) +
            colored(context[endpostion:], Fore.LIGHTBLACK_EX)
        )
        # print(
        #     indention +
        #     offset * " " +
        #     colored(length * "^", Fore.LIGHTRED_EX)
        # )
        print(
            indention +
            ruler[:offset] +
            colored(ruler[offset:endpostion], Fore.LIGHTRED_EX) +
            ruler[endpostion:]
        )

        if error["replacements"]:
            # only print first 5 replacements
            for replacement in error["replacements"][:5]:
                print(
                    indention[:2] +
                    tick +
                    colored(context[:offset], Fore.LIGHTBLACK_EX) +
                    colored(replacement["value"], Fore.LIGHTGREEN_EX) +
                    colored(context[endpostion:], Fore.LIGHTBLACK_EX)
                )
        rule = error["rule"]
        if rules:
            print(
                indention[:2] + colored(rule["id"] + ": ", Fore.LIGHTBLACK_EX) + rule["description"]
            )
        if rule_categories:
            category = rule["category"]
            print(
                indention[:2] + colored(category["id"] + ": ", Fore.LIGHTBLACK_EX) + category["name"]
            )
        #sys.stderr.write("Rule=%s\n" % rule)
        if explain_rule:
            rule = error["rule"]
            if "description" in rule and "urls" in rule and len(rule["urls"]) > 0:
                rule_explanations.append((rule["description"], rule["urls"][0]["value"]))
        print()

    if explain_rule and rule_explanations:
        col_len = max(len(d) for d,u in rule_explanations) + 1
        for descr, url in rule_explanations:
            print(descr + ":" + " " * (col_len - len(descr)) + url)
        print()

コード例 #59

0

ファイルを表示

ファイル: Pad.py プロジェクト: yottu/yottu

	def addstr(self, string, options=curses.A_NORMAL, indent=0, mentioned=False):  # @UndefinedVariable
		try:
			
			# wait until other threads have finished writing
			self.lock.acquire_lock()

			# check if comment needs to be line wrapped, indent it if so
			if indent:
				
				# iterate over every character, note that BoardPad sends a string
				# for every word delimited by a space
				for stringpos, character in enumerate(string.decode('utf-8')):
					(self.pposy, self.pposx) = self.mypad.getyx()
					
					# FIXME: also == 'F' 
					if (unicodedata.east_asian_width(u''.join(character)) or unicodedata.east_asian_width(u''.join(character)) == 'W') and self.pposx == self.pwidth-1:
						self.mypad.addstr("\n")
						
						(self.pposy, self.pposx) = self.mypad.getyx()
						self.size = self.pposy
					
					# wrap oversized word at the end of the line
					if stringpos == 0:
						space_needed = self.pposx + len(string)
						#indented_space = self.pmaxx - indent
							
						if space_needed > self.pwidth:
							self.mypad.addstr("\n")
							
							#self.line += u"\n".decode('utf-8')
							(self.pposy, self.pposx) = self.mypad.getyx()
							self.size = self.pposy
					

					
					# indent after line wrap		
					if self.pposx == 0:
						self.mypad.addstr(" "*indent)
						
					# output the character and adjust the pad size
					self.mypad.addstr(character.encode('utf-8'), options)
					(self.pposy, self.pposx) = self.mypad.getyx()
					self.size = self.pposy
					
			
			# add string to current position		
			else:
				self.mypad.addstr(string, options)
				(self.pposy, self.pposx) = self.mypad.getyx()
				self.size = self.pposy
		
			if mentioned:
				self.wl.windowListProperties[self]['sb_mentioned'] = True
		except Exception as err:
			self.dlog.excpt(err, msg=">>>in Pad.addstr() - indent != 0", cn=self.__class__.__name__)
			if str(err) == "addstr() returned ERR":
				self.dlog.msg("Pad full. Reinitializing..")
				self.mypad = curses.newpad(self.pheight+Pad.padbuffersize, self.pwidth)  # @UndefinedVariable
		finally:
			self.lock.release_lock()

		# Increase unread line counter on inactive windows
		if re.search(r'\n', string):
			if not self._active or not self.autoScroll:
				try:
					self.wl.set_property(self, 'sb_unread', True)
					self.wl.windowListProperties[self]['sb_lines'] += 1
						
					self.generate_unread_window_element()
				except KeyError:
					pass	
				except Exception as err:
					self.dlog.excpt(err, msg="Pad.addstr() -> not self._active")
			self.auto_scroll()

コード例 #60

0

ファイルを表示

ファイル: table.py プロジェクト: leduythuccs/VOJ-rebuild-discord-bot

def width(s):
    return round(sum(WIDTH_MAPPING[unicodedata.east_asian_width(c)]
                     for c in s))