def __init__(self, text, aspect_ratio=1, textual=False, textual_strict=False, widechars='F,W'): """Take a ASCII art figure and store it, prepare for ``recognize``""" self.aspect_ratio = float(aspect_ratio) self.textual = textual self.textual_strict = textual_strict # XXX TODO tab expansion # detect size of input image, store as list of lines self.image = [] max_x = 0 y = 0 # define character widths map charwidths = {} for key in ['F', 'H', 'W', 'Na', 'A', 'N']: if key in widechars.split(','): charwidths[key] = 2 else: charwidths[key] = 1 for line in text.splitlines(): # extend length by 1 for each wide glyph line_len = sum(charwidths[east_asian_width(c)] for c in line) max_x = max(max_x, line_len) # pad a space for each wide glyph padded_line = ''.join(c+' '*(charwidths[east_asian_width(c)]-1) for c in line) self.image.append(padded_line) y += 1 self.width = max_x self.height = y # make sure it's rectangular (extend short lines to max width) for y, line in enumerate(self.image): if len(line) < max_x: self.image[y] = line + ' '*(max_x-len(line)) # initialize other data structures self.classification = [[None]*self.width for y in range(self.height)] self.shapes = [] self.nominal_size = NOMINAL_SIZE
def strpad(self, s, width): if width < 1: return str() if '\n' in s: s = s.replace('\n', '\\n') # take into account double-width characters buf = str() buf_width = 0 for c in s: w = 2 if unicodedata.east_asian_width(c) == 'W' else 1 if buf_width + w > width: break buf_width += w buf += c if len(buf) < len(s): # truncation occurred while buf_width + len(self.trunc_char) > width: c = buf[-1] w = 2 if unicodedata.east_asian_width(c) == 'W' else 1 buf = buf[0:-1] buf_width -= w buf += ' ' * (width - buf_width - len(self.trunc_char)) buf += self.trunc_char elif buf_width < width: # padding required buf += ' ' * (width - buf_width) return buf
def truncate_str(s,width): # width = screen width wide_count = 0 for i in range(len(s)): wide_count += unicodedata.east_asian_width(s[i]) in ('F','W') if wide_count + i >= width: return s[:i] + ('',' ')[ unicodedata.east_asian_width(s[i]) in ('F','W') and wide_count + i == width] else: # pad the string to width if necessary return s + ' '*(width-len(s)-wide_count)
def strwidth_ucs_2(width_data, string): return sum((( ( width_data[east_asian_width(string[i - 1] + symbol)] ) if 0xDC00 <= ord(symbol) <= 0xDFFF else ( 0 ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else ( width_data[east_asian_width(symbol)] ) ) for i, symbol in enumerate(string)))
def string_width(string): """Get the visible width of a unicode string. Some CJK unicode characters are more than one byte unlike ASCII and latin unicode characters. From: https://github.com/Robpol86/terminaltables/pull/9 :param str string: String to measure. :return: String's width. :rtype: int """ # Colorclass instance. if hasattr(string, 'value_no_colors'): string = string.value_no_colors # Convert to unicode. try: decoded = string.decode('u8') except (AttributeError, UnicodeEncodeError): decoded = string width = 0 for char in decoded: if unicodedata.east_asian_width(char) in ('F', 'W'): width += 2 else: width += 1 return width
def cjk_width(text): import sys if sys.version_info[0] < 3: if not isinstance(text, unicode): text = text.decode("utf-8") from unicodedata import east_asian_width return sum(1+(east_asian_width(c) in "WF") for c in text)
def tt_width(s, index=0, ambiguous_as_wide=False): """Return logical width of the grapheme cluster at `s[index]` on fixed-width typography Return value will be ``1`` (halfwidth) or ``2`` (fullwidth). Generally, the width of a grapheme cluster is determined by its leading code point. >>> tt_width('A') 1 >>> tt_width('\\u8240') # U+8240: CJK UNIFIED IDEOGRAPH-8240 2 >>> tt_width('g\\u0308') # U+0308: COMBINING DIAERESIS 1 >>> tt_width('\\U00029e3d') # U+29E3D: CJK UNIFIED IDEOGRAPH-29E3D 2 If `ambiguous_as_wide` is specified to ``True``, some characters such as greek alphabets are treated as they have fullwidth as well as ideographics does. >>> tt_width('\\u03b1') # U+03B1: GREEK SMALL LETTER ALPHA 1 >>> tt_width('\\u03b1', ambiguous_as_wide=True) 2 """ cp = code_point(s, index) eaw = east_asian_width(cp) if eaw in ('W', 'F') or (eaw == 'A' and ambiguous_as_wide): return 2 return 1
def get_real_width(str): """Gets real width of a string accounting for double width characters.""" real_width = 0 for char in str: real_width += 2 if east_asian_width(char) == 'W' else 1 return real_width
def __new__(cls,s,on_fail='die',msg=None): if type(s) == cls: return s cls.arg_chk(on_fail) for k in cls.forbidden,cls.allowed: assert type(k) == list for ch in k: assert type(ch) == str and len(ch) == 1 try: s = s.strip() if type(s) != str: s = s.decode('utf8') for ch in s: # Allow: (L)etter,(N)umber,(P)unctuation,(S)ymbol,(Z)space # Disallow: (C)ontrol,(M)combining # Combining characters create width formatting issues, so disallow them for now if unicodedata.category(ch)[0] in 'CM': t = { 'C':'control', 'M':'combining' }[unicodedata.category(ch)[0]] raise ValueError('{}: {} characters not allowed'.format(ascii(ch),t)) me = str.__new__(cls,s) if cls.max_screen_width: me.screen_width = len(s) + len([1 for ch in s if unicodedata.east_asian_width(ch) in ('F','W')]) assert me.screen_width <= cls.max_screen_width,( 'too wide (>{} screen width)'.format(cls.max_screen_width)) else: assert len(s) <= cls.max_len, 'too long (>{} symbols)'.format(cls.max_len) assert len(s) >= cls.min_len, 'too short (<{} symbols)'.format(cls.min_len) assert not cls.allowed or set(list(s)).issubset(set(cls.allowed)),\ 'contains non-allowed symbols: {}'.format(' '.join(set(list(s)) - set(cls.allowed))) assert not cls.forbidden or not any(ch in s for ch in cls.forbidden),\ "contains one of these forbidden symbols: '{}'".format("', '".join(cls.forbidden)) return me except Exception as e: return cls.init_fail(e,s)
def visible_width(string): """Get the visible width of a unicode string. Some CJK unicode characters are more than one byte unlike ASCII and latin unicode characters. From: https://github.com/Robpol86/terminaltables/pull/9 :param str string: String to measure. :return: String's width. :rtype: int """ if '\033' in string: string = RE_COLOR_ANSI.sub('', string) # Convert to unicode. try: string = string.decode('u8') except (AttributeError, UnicodeEncodeError): pass width = 0 for char in string: if unicodedata.east_asian_width(char) in ('F', 'W'): width += 2 else: width += 1 return width
def charwidth(char): # 私の環境では、Full-widthもAmbiguousもNot East Asianも # 半角文字として扱われていた if unicodedata.east_asian_width(char) == 'W': return 2 else: return 1
def unisub(m): c = m.group(1) w = 2 if unicodedata.east_asian_width(c) == 'W' else 1 if w == 2: self.line_l += 1 return '<span class="u" data-glyph="&#x{0:x};">{1}</span>' \ .format(ord(c), ' ' * w)
def GetCharacterDisplayWidth(char): """Returns the monospaced terminal display width of char. Assumptions: - monospaced display - ambiguous or unknown chars default to width 1 - ASCII control char width is 1 => don't use this for control chars Args: char: The character to determine the display width of. Returns: The monospaced terminal display width of char: either 0, 1, or 2. """ if not isinstance(char, unicode): # Non-unicode chars have width 1. Don't use this function on control chars. return 1 # Normalize to avoid special cases. char = unicodedata.normalize('NFC', char) if unicodedata.combining(char) != 0: # Modifies the previous character and does not move the cursor. return 0 elif unicodedata.category(char) == 'Cf': # Unprintable formatting char. return 0 elif unicodedata.east_asian_width(char) in 'FW': # Fullwidth or Wide chars take 2 character positions. return 2 else: # Don't use this function on control chars. return 1
def textwrap(ustr, width=80, leadingspace=6): import unicodedata if not ustr: return [] lines = [] i = 0 buf = [] for c in ustr: if unicodedata.east_asian_width(c) != 'Na': d = 2 else: d = 1 if i+d <= width: buf.append(c) i += d else: lines.append(''.join(buf)) buf = [leadingspace*' ' + c] i = d + leadingspace if buf: lines.append(''.join(buf)) return lines
def width(c): if ((isinstance(c, type(u"")) and unicodedata.east_asian_width(c) == 'W')): return 2 elif c == '\r': return 2 return 1
def strong_width(s): if not s in _strong_width_map: w = 0 for c in s: w += 2 if unicodedata.east_asian_width(c) in ("F", "W") else len(unicodedata.normalize('NFC', c)) _strong_width_map[s] = w return _strong_width_map[s]
def get_text_width(text): num_full_width = 0 for c in text: eaw = unicodedata.east_asian_width(c) if eaw in (u'W', u'F', u'A'): num_full_width += 1 return len(text) + num_full_width
def main(): # 默认城市 city = "深圳" # 判断汉字条件 chz = True phz = True if len(sys.argv) > 2: print("\n请输入正确的命令:oneweathert.py [中国城市名称],目前本程序只支持一个城市的天气询查!") print("如:oneweathert.py 北京 , 不带城市名称,默认城市:" + city) else: if len(sys.argv) == 1: print("\n可输入城市名参数:oneweathert.py [中国城市名称/默认" + city + "],如:oneweathert.py 北京") winfo(city) elif len(sys.argv) == 2: # 判断参数是否为汉字,只支持汉字 for cs in range(0, len(sys.argv[1])): if unicodedata.east_asian_width(sys.argv[1][cs]) != "Na": chz = True else: chz = False phz = chz and phz if not phz: print("\n请检查城市名称是否正确,本程序只支持中国中文城市名称!") else: city = sys.argv[1] winfo(city)
def string_width(string): """ Return the width of <string>. E.g. : "abc" -> 3 "日本語" -> 6 """ return sum(WIDTHMAP[unicodedata.east_asian_width(c)] for c in string)
def east_asian_column_width(text): if isinstance(text, types.UnicodeType): total = 0 for c in text: total += east_asian_widths[unicodedata.east_asian_width(c)] return total else: return len(text)
def calc_string_width(s): """ return extra width generated by east-asian chars""" extra_width = 0 for c in s: ctype = unicodedata.east_asian_width(c) if ctype == "F" or ctype == "W" or ctype == "A": extra_width += 1 return extra_width, len(s) + extra_width
def string_to_charlist(string): """Return a list of characters with extra empty strings after wide chars""" if not set(string) - ASCIIONLY: return list(string) result = [] if PY3: for c in string: result.append(c) if east_asian_width(c) in WIDE_SYMBOLS: result.append('') else: string = string.decode('utf-8', 'ignore') for c in string: result.append(c.encode('utf-8')) if east_asian_width(c) in WIDE_SYMBOLS: result.append('') return result
def strwidth_ucs_4(width_data, string): return sum((( ( 0 ) if combining(symbol) else ( width_data[east_asian_width(symbol)] ) ) for symbol in string))
def mlen(u): n = 0 for c in u: wide_chars = u"WFA" eaw = unicodedata.east_asian_width(c) if(wide_chars.find(eaw) > -1): n +=1 return n + len(u)
def count_len(data): ret = 0 for c in data: if unicodedata.east_asian_width(c) == 'W': ret += 2 else: ret += 1 return ret
def find_width_of_char(self, char): return 1 w = unicodedata.east_asian_width(char) if w == "A": # Abiguous - allow 1, but be aware that this could well be wrong return 1 else: return self.__class__.width_mapping[w]
def unicode_char_width(c): """ Wide chars are Chinese ideographs, Japanese kanji and alike. They get two columns of space to render. """ return { 'Na': 1, 'N': 1, 'H': 1, 'W': 2, 'F': 2 } [east_asian_width(c)]
def column_width(string): l = 0 for c in unicode(string): if unicodedata.east_asian_width(c) in "WF": l += 2 else: l += 1 return l
def east_asian_len(data, encoding=None, ambiguous_width=1): """ Calculate display width considering unicode East Asian Width """ if isinstance(data, text_type): return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data]) else: return len(data)
def _width_when_printed(self, mixed): """Counts text's actual width in terminal when fixed-width font is used. http://unicode.org/reports/tr11/ is more information about W and F chars.""" text = self._all_to_unicode(mixed) text = self._strip_nonprintable(text) return sum(1 + (unicodedata.east_asian_width(c) in "WF") \ for c in text)
def calcFormatWidth(text, target): return target - sum(unicodedata.east_asian_width(c) in 'WF' for c in text)
def width(string): return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
def unicode_width(s): if is_unicode(s): return sum(__unicode_width_mapping[east_asian_width(c)] for c in s) assert is_strlike(s), 's must be a string, not %s.' % type(s).__name__ return len(s)
def _wrap(text, width, indent, firstindent): if not text: return '' text = text.strip() s = [] pos = 0 for i in rx.finditer(text): if i.start() > pos: s.extend(text[pos:i.start()].split()) s.append(i.group()) pos = i.end() if pos < len(text): s.extend(text[pos:].split()) ss = [s[0]] #get first element character is asian char flag flag = unicodedata.east_asian_width(s[0][0]) != 'Na' for i in range(1, len(s)): f = unicodedata.east_asian_width(s[i][0]) != 'Na' if f and f == flag: ss[-1] = ss[-1] + s[i] else: ss.append(s[i]) flag = f s = ss t = [] y = 0 buf = [] x = 0 while s: i = s.pop(0) if unicodedata.east_asian_width(i[0]) != 'Na': factor = 2 else: factor = 1 if x == 0: w = width - len(firstindent) step = firstindent else: w = width - len(indent) step = indent length = y + len(i) * factor + len(buf) # print 'length', length, s[0].encode('gbk') if length == w: buf.append(i) _add_line(t, step + ' '.join(buf)) # t.append(step + ' '.join(buf)) x = 1 buf = [] y = 0 elif length > w: if factor == 2 or (factor == 1 and len(i) * factor >= w): buf_len = len(buf) rest = w - y - buf_len buf.append(i[:rest / factor]) # print '----', w, y, buf_len, (w-y-buf_len-1), buf _add_line(t, step + ' '.join(buf)) # t.append(step + ' '.join(buf)) x = 1 s.insert(0, i[rest / factor:]) buf = [] y = 0 continue else: _add_line(t, step + ' '.join(buf)) # t.append(step + ' '.join(buf)) x = 1 s.insert(0, i) buf = [] y = 0 continue else: buf.append(i) y += factor * len(i) if buf: _add_line(t, step + ' '.join(buf), '') # t.append(step + ' '.join(buf)) return cr.join(t)
def charwidth(char: str, widechars: str) -> int: if east_asian_width(char) in widechars: return 2 else: return 1
def text_len(text): count = 0 for c in text: count += 2 if unicodedata.east_asian_width(c) in 'FWA' else 1 return count
def display_length(text): ''' 字符长度,1 个汉字当 2 个英文 ''' width = 0 for char in text: width += east_asian_width(char) == 'Na' and 1 or 2 return width
def chr_width(c): if (unicodedata.east_asian_width(c) in ('F', 'W', 'A')): return 2 else: return 1
def _countwide(string): '''Count the numer of wide characters in a string.''' return sum(unicodedata.east_asian_width(char) == 'W' for char in string)
def get_zen_count(text): count=0 for c in text: if unicodedata.east_asian_width(c) in "FWA": count +=1 return count
def get_single_with(base) -> int: if unicodedata.east_asian_width(u"%s"%base) in ("F", "W"): return 2 else: return 1
def isfullwidth(token): return all( unicodedata.east_asian_width(char) in ['W', 'F', 'A'] for char in token)
blogs.append(BASE_URL + link.attrs['href']) # 2. 1で取得したURLを全てクローリングし、BODYタグの全ての全角文字を抽出する # 全てのblogを走査 contents_lo = '' for blog in blogs: html = req.get(blog).text # BeautifulSoupでHTMLを解析 soup = bs(html, 'html.parser') for script in soup(["script", "style"]): script.extract() # bodyコンテンツ内のinner textのみを抽出 contents = soup.find('body').get_text() # extractで救いきれなかったので、unicodedata.east_asian_widthで全角かどうか判定 for uniStr in contents: str_width = unicodedata.east_asian_width(uniStr) if str_width == 'W': contents_lo += uniStr # 3. 2で抽出した全角文字を形態素解析に掛けて、一般名詞のみを抽出する # 形態素解析にかけて、一般名詞だけを抽出 m = mc.Tagger('mecabrc') mecab_result = m.parse(contents_lo) info_of_words = mecab_result.split('\n') words = [] for info in info_of_words: if '\t' in info: kind = info.split('\t')[1].split(',')[0] category = info.split('\t')[1].split(',')[1] if kind == '名詞' and category == '一般' and (info.split('\t')[0] not in EXCLUDE_STR_LIST):
def run(input_stream, options, encoding='utf8'): data = defaultdict(int) total = 0 type_ = float for row in input_stream: if options.agg_key_value: kv = row.rstrip().rsplit(None, 1) if kv[1].isdigit(): type_ = int value = type_(kv[1]) data[kv[0].decode(encoding)] += value total += value elif options.agg_value_key: kv = row.lstrip().split(None, 1) if kv[0].isdigit(): type_ = int value = type_(kv[0]) data[kv[1].decode(encoding)] += value total += value else: row = row.decode(encoding) data[row] += 1 total += 1 if not data: print "Error: no data" sys.exit(1) max_length = max([len(key) for key in data.keys()]) max_length = min(max_length, int(options.max_key_length)) value_characters = int(options.max_key_length) + 30 - max_length max_value = max(data.values()) scale = int(math.ceil(float(max_value) / value_characters)) scale = max(1, scale) print("# each " + options.dot + " represents a count of %d. total %d" % (scale, total)).encode(encoding) if options.sort_values: data = [[v, k] for k, v in data.items()] data.sort(key=lambda x: x[0], reverse=options.reverse_sort) else: # sort by keys data = [[v, k] for k, v in data.items()] if options.numeric_sort: # keys could be numeric too data.sort(key=lambda x: (Decimal(x[1])), reverse=options.reverse_sort) else: data.sort(key=lambda x: x[1], reverse=options.reverse_sort) percentage = "" if options.lines: nlines = int(options.lines) else: nlines = None for value, key in data[:nlines]: if options.percentage: percentage = " (%0.2f%%)" % (100 * Decimal(value) / Decimal(total)) name = [(c, len(c) + sum(1 for d in c if unicodedata.east_asian_width(d) == 'W')) for c in key] title = u'' cum = 0 for c, l in name: cum += l if cum <= max_length: title += c else: break pad = u' ' * (max_length - cum) if isinstance(value, int): formatted_string = "%s%s [%6d] %s%s" else: formatted_string = "%s%s [%6.2f] %s%s" print (formatted_string % (pad, title[:max_length], value, int(value / scale) * options.dot, percentage))\ .encode(encoding)
# -*- coding: utf-8 -*- """ https://note.nkmk.me/python-unicodedata-east-asian-width-count/ Created on Mon Oct 29 16:17:03 2018 @author: Akitaka """ import unicodedata print(unicodedata.east_asian_width('あ')) # 全角かな print(type(unicodedata.east_asian_width('あ'))) # W # <class 'str'> print(unicodedata.east_asian_width('a')) # 半角英数 # Na print(unicodedata.east_asian_width('A')) # 全角英数 # F print(unicodedata.east_asian_width('ア')) # 半角カナ # H print(unicodedata.east_asian_width('Å')) # 特殊文字(例: オングストローム) # A import unicodedata def get_east_asian_width_count(text): count = 0
def strlen(s): return sum(1 + (unicodedata.east_asian_width(c) in "WFA") for c in s)
async def end(ctx, boss: str, time: str): global notes msg = "" target_boss = "" #入力値を登録ボス名へ変換 target_boss = sub_module.ChangeName(boss) #入力コマンドの正常性判定 if target_boss == "": await ctx.send('入力されたボス名が正しくありません :sob:\n再入力してください :pray:') sys.exit() else: msg = '【' + target_boss + '】の登録を受け付けました :memo: ' if not int(len(time)) == 4: await ctx.send('入力時間が4桁ではありません :sob:\n再入力してください :pray:') sys.exit() for c in time: if (unicodedata.east_asian_width(str(c)) == 'F') or (unicodedata.east_asian_width(str(c)) == 'W'): await ctx.send('入力時間に全角文字が含まれています :sob:\n再入力してください :pray:') sys.exit() #次回出現時間の作成 cyc = "" notes = "" cnt = 0 update_row = 0 target_time = '' end_date = datetime.now(pytz.timezone('Asia/Tokyo')).strftime('%Y/%m/%d') end_hour = time[:2] end_min = time[2:] last_time = end_date + ' ' + end_hour + ':' + end_min with open("./data/BossList.csv", "r", encoding="utf_8") as read_csv: reader = csv.reader(read_csv) header = next(reader) for row in reader: cnt = cnt + 1 if row[0] == target_boss: update_row = cnt - 1 notes = ':map: : ' + row[1] if len(row[2]) == 5: cyc = row[2] cyc_hour = cyc[:2] cyc_min = cyc[3:] if row[3] == "o": notes = notes + ' , ランダム出現だよ :cyclone:' if cyc: end_hour = str(int(end_hour) + int(cyc_hour)) end_min = str(int(end_min) + int(cyc_min)) if int(end_min) > 59: end_hour = str(int(end_hour) + 1) end_min = str(int(end_min) - 60) if int(end_hour) > 23: end_hour = str(int(end_hour) - 24) if len(end_hour) == 1: end_hour = '0' + end_hour if len(end_min) == 1: end_min = '0' + end_min target_time = end_hour + ':' + end_min msg = msg + '\n次回出現時間の5分前 <' + sub_module.MakeTime( target_time) + '> にリマインダーをセットしました :alarm_clock:' msg = msg + '\n(' + notes + ')' #更新処理 if not target_time == '': with open('./data/Schedule.csv', 'a', newline='', encoding="utf_8") as write_csv: writer = csv.writer(write_csv) writer.writerow([target_time, target_boss, 'temp', '出現', notes]) write_csv.close() if update_row > -1: df = pd.read_csv('./data/BossList.csv', encoding="utf_8") df.loc[update_row, 'last time'] = last_time df.to_csv('./data/BossList.csv', index=False) #情報登録・リマインダー設定の通知 await ctx.send(msg)
def string_len_dwc(string: str) -> int: """Returns string len including count for double width characters""" return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string)
async def set(ctx, boss: str, time: str): global notes msg = "" target_boss = "" #入力値を登録ボス名へ変換 target_boss = sub_module.ChangeName(boss) #入力コマンドの正常性判定 if target_boss == "": await ctx.send('入力されたボス名が正しくありません :sob:\n再入力してください :pray:') sys.exit() else: msg = '【' + target_boss + '】の登録を受け付けました :memo: ' if not int(len(time)) == 4: await ctx.send('入力時間が4桁ではありません :sob:\n再入力してください :pray:') sys.exit() for c in time: if (unicodedata.east_asian_width(str(c)) == 'F') or (unicodedata.east_asian_width(str(c)) == 'W'): await ctx.send('入力時間に全角文字が含まれています :sob:\n再入力してください :pray:') sys.exit() #リマインダーの設定 target_time = "" set_hour = time[:2] set_min = time[2:] notes = "" with open("./data/BossList.csv", "r", encoding="utf_8") as read_csv: reader = csv.reader(read_csv) header = next(reader) for row in reader: if row[0] == target_boss: notes = ':map: : ' + row[1] if row[3] == "o": notes = notes + ' , ランダム出現だよ :cyclone:' else: notes = notes + ')' set_min = str(int(set_min) + 5) if int(set_min) > 60: set_min = str(int(set_min) - 60) set_hour = str(int(set_hour) + 1) if len(set_min) == 1: set_min = '0' + set_min if len(set_hour) == 1: set_hour = '0' + set_hour target_time = set_hour + ':' + set_min msg = msg + '\n <' + sub_module.MakeTime( target_time) + '> にリマインダーをセットしました :alarm_clock:\n' msg = msg + '(' + notes + ')' #更新処理 with open('./data/Schedule.csv', 'a', newline='', encoding="utf_8") as write_csv: writer = csv.writer(write_csv) writer.writerow([target_time, target_boss, 'temp', '出現', notes]) write_csv.close() #リマインダー設定の通知 await ctx.send(msg)
def _is_wide(ch): """Return display width of character""" return east_asian_width(ch) in 'WF'
def Check(scr): for char in scr: if unicodedata.east_asian_width(char) != 'Na': return True return False
def _charwidth(c): return 2 if unicodedata.east_asian_width(c) in ['F', 'W', 'A'] else 1
def charwidth(c): wc = unicodedata.east_asian_width(c) return 2 if wc == 'F' or wc == 'W' else 1
def get_string_display_width(arg_string): ''' Returns a real display width of string ''' return (sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in arg_string))
def utf_char_width(string): """Return the width of a single character""" if east_asian_width(string) in WIDE_SYMBOLS: return WIDE return NARROW
async def charinfo(self, ctx, *, data: str): """Shows information about one or several characters. 'data' can either be a character, a unicode escape sequence, a unicode character name or a string. If 'data' is a string only a summary of each character's info will be displayed. """ data = data.lower() if data.startswith('\\u'): # Let's interpret the unicode escape sequence hex_values = data.split('\\u')[1:] try: code_points = [int(val, 16) for val in hex_values] except ValueError: raise commands.BadArgument('Invalid unicode escape sequence.') else: data = ''.join(chr(cp) for cp in code_points) elif len(data) > 1: # Maybe we've been given the character's name ? try: data = unicodedata.lookup(data) except KeyError: pass # Normalise the input data = unicodedata.normalize('NFC', data) url_fmt = '<http://unicode-table.com/en/{:X}>' if len(data) == 1: # Detailed info on the character entries = [('Character', data), ('Name', unicodedata.name(data, 'None')), ('Code point', f'{ord(data):04x}')] decomposition = unicodedata.decomposition(data) if decomposition != '': entries.append(('Decomposition', decomposition)) combining = unicodedata.combining(data) if combining: entries.append(('Combining class', combining)) entries.append(('Category', unicodedata.category(data))) bidirectional = unicodedata.bidirectional(data) entries.append(('Bidirectional', bidirectional if bidirectional != '' else 'None')) entries.append( ('Mirrored', 'True' if unicodedata.mirrored(data) == 1 else 'False')) entries.append( ('East asian width', unicodedata.east_asian_width(data))) entries.append(('Url', url_fmt.format(ord(data)))) # Create the message's content and send it content = utils.indented_entry_to_str(entries) await ctx.send(utils.format_block(content)) else: # Minimal info for each character entries = [ f'`\N{ZERO WIDTH SPACE}{c}\N{ZERO WIDTH SPACE}` | `\\u{ord(c):04x}` | `{unicodedata.name(c, "None")}` | {url_fmt.format(ord(c))}' for c in data ] content = '\n'.join(entries) await ctx.send(content)
def __init__(self, char): self.char = char self.name = unicodedata.name(char) self.category = unicodedata.category(char) self.east_asian_width = unicodedata.east_asian_width(char)
def print_errors(response, api_url, print_color=True, rules=False, rule_categories=False, explain_rule=False, use_html=False): matches = response["matches"] language = response["language"] version = response["software"]["name"] + " " + response["software"]["version"] def colored(text, color): if print_color: if not use_html: init_colors(strip=False) return color + text + Fore.RESET else: return htmlma.htmlColored(color,text) else: return text # if DIAGNOSE: # print(colored( # "{} detected ({:.0f}% confidence)".format(language["detectedLanguage"]["name"], # language["detectedLanguage"]["confidence"] * 100) # , Fore.LIGHTBLACK_EX)) # if language["detectedLanguage"]["code"] != language["code"]: # print(colored( # "checking as {} text because of setting".format(language["name"]) # , Fore.LIGHTBLACK_EX)) # print() # 27june2021/sik - Make it more informative. # 16dec2021/sik - commented out because very error prone. # if language["detectedLanguage"]["code"] != language["code"]: # print(colored( # "{} detected ({:.0f}% confidence)".format(language["detectedLanguage"]["name"], # language["detectedLanguage"]["confidence"] * 100) # , Fore.LIGHTBLACK_EX)) # print(colored( # "checking as {} text because of setting".format(language["name"]) # , Fore.LIGHTBLACK_EX)) tick = colored(u"\u2713", Fore.LIGHTGREEN_EX) + " " cross = colored(u"\u2717", Fore.LIGHTRED_EX) + " " rule_explanations = [] for error in matches: context_object = error["context"] context = context_object["text"] length = context_object["length"] offset = context_object["offset"] sentence = error["sentence"] # use sentence instead diag = False if language["code"] in ["ja","ja-JP"]: if not context.startswith("..."): #context = sentence # use sentence instead pass # recalculate offset value #if context[offset:offset+length] == #diag = True if DIAGNOSE or diag: print(f"length={length} offset={offset} error={error} language={language}") count = 0 ruler = "" ruler_len = length space_before_offset = 0 enable_dbcs = False # whether DBCS specicfic logic is needed here or not pc = "" pstate = "" for c in context: cstate = unicodedata.east_asian_width(c) if count <= offset+space_before_offset+2: if cstate == "W": if pc == " ": space_before_offset += 1 enable_dbcs = True else: if c == " " and pstate== "W": space_before_offset += 1 enable_dbcs = True if c == "。": space_before_offset = 0 pstate = cstate pc = c count += 1 count = 0 for c in context: cstate = unicodedata.east_asian_width(c) if count >= offset+space_before_offset and count < offset+space_before_offset+length: if cstate == "W": ruler += "〜" else: ruler += "^" elif count <= offset+space_before_offset+length: if cstate == "W": ruler += " " else: ruler += " " if DIAGNOSE or diag: print(f"{count} {cstate}: {c} - {ruler} {cstate} sbo={space_before_offset}") count += 1 # temporary bug fix? if DIAGNOSE or diag: print(f"old: length={length} offset={offset} enable_dbcs={enable_dbcs}") if space_before_offset and enable_dbcs: offset += space_before_offset if DIAGNOSE or diag: print(f"new: length={length} offset={offset}") endpostion = offset + length print(error["message"]) print( indention[:2] + cross + colored(context[:offset], Fore.LIGHTBLACK_EX) + colored(context[offset:endpostion], Fore.LIGHTRED_EX) + colored(context[endpostion:], Fore.LIGHTBLACK_EX) ) # print( # indention + # offset * " " + # colored(length * "^", Fore.LIGHTRED_EX) # ) print( indention + ruler[:offset] + colored(ruler[offset:endpostion], Fore.LIGHTRED_EX) + ruler[endpostion:] ) if error["replacements"]: # only print first 5 replacements for replacement in error["replacements"][:5]: print( indention[:2] + tick + colored(context[:offset], Fore.LIGHTBLACK_EX) + colored(replacement["value"], Fore.LIGHTGREEN_EX) + colored(context[endpostion:], Fore.LIGHTBLACK_EX) ) rule = error["rule"] if rules: print( indention[:2] + colored(rule["id"] + ": ", Fore.LIGHTBLACK_EX) + rule["description"] ) if rule_categories: category = rule["category"] print( indention[:2] + colored(category["id"] + ": ", Fore.LIGHTBLACK_EX) + category["name"] ) #sys.stderr.write("Rule=%s\n" % rule) if explain_rule: rule = error["rule"] if "description" in rule and "urls" in rule and len(rule["urls"]) > 0: rule_explanations.append((rule["description"], rule["urls"][0]["value"])) print() if explain_rule and rule_explanations: col_len = max(len(d) for d,u in rule_explanations) + 1 for descr, url in rule_explanations: print(descr + ":" + " " * (col_len - len(descr)) + url) print()
def addstr(self, string, options=curses.A_NORMAL, indent=0, mentioned=False): # @UndefinedVariable try: # wait until other threads have finished writing self.lock.acquire_lock() # check if comment needs to be line wrapped, indent it if so if indent: # iterate over every character, note that BoardPad sends a string # for every word delimited by a space for stringpos, character in enumerate(string.decode('utf-8')): (self.pposy, self.pposx) = self.mypad.getyx() # FIXME: also == 'F' if (unicodedata.east_asian_width(u''.join(character)) or unicodedata.east_asian_width(u''.join(character)) == 'W') and self.pposx == self.pwidth-1: self.mypad.addstr("\n") (self.pposy, self.pposx) = self.mypad.getyx() self.size = self.pposy # wrap oversized word at the end of the line if stringpos == 0: space_needed = self.pposx + len(string) #indented_space = self.pmaxx - indent if space_needed > self.pwidth: self.mypad.addstr("\n") #self.line += u"\n".decode('utf-8') (self.pposy, self.pposx) = self.mypad.getyx() self.size = self.pposy # indent after line wrap if self.pposx == 0: self.mypad.addstr(" "*indent) # output the character and adjust the pad size self.mypad.addstr(character.encode('utf-8'), options) (self.pposy, self.pposx) = self.mypad.getyx() self.size = self.pposy # add string to current position else: self.mypad.addstr(string, options) (self.pposy, self.pposx) = self.mypad.getyx() self.size = self.pposy if mentioned: self.wl.windowListProperties[self]['sb_mentioned'] = True except Exception as err: self.dlog.excpt(err, msg=">>>in Pad.addstr() - indent != 0", cn=self.__class__.__name__) if str(err) == "addstr() returned ERR": self.dlog.msg("Pad full. Reinitializing..") self.mypad = curses.newpad(self.pheight+Pad.padbuffersize, self.pwidth) # @UndefinedVariable finally: self.lock.release_lock() # Increase unread line counter on inactive windows if re.search(r'\n', string): if not self._active or not self.autoScroll: try: self.wl.set_property(self, 'sb_unread', True) self.wl.windowListProperties[self]['sb_lines'] += 1 self.generate_unread_window_element() except KeyError: pass except Exception as err: self.dlog.excpt(err, msg="Pad.addstr() -> not self._active") self.auto_scroll()
def width(s): return round(sum(WIDTH_MAPPING[unicodedata.east_asian_width(c)] for c in s))