def wrapper_jp(string, width): """Japanese string with newline wrapping function""" segmenter = TinySegmenter() tokens = segmenter.tokenize(string) token_remain = lambda: len(tokens) > 0 # save lines shorter than width into result result = "" while token_remain(): line = "" # accumulate tokens whose total is shorter than width into line while token_remain() and len(line + tokens[0]) <= width: line += tokens.pop(0) else: result += line + ('\n' if token_remain() else '') # print(result) return result
def on_status(self, status): flg = 0 for buff in exception_ids: if (status.author.screen_name == buff) : flg = 1 if not hasattr(status, 'retweeted_status') and flg != 1: try: print u'\n---{name}/@{screen}---\n {text}\nvia {src} {created}'.format( name = status.author.name, screen = status.author.screen_name, text = status.text.replace('&','&'), src = status.source, created = status.created_at) read_text = str_replace(status.author.name.decode('utf-8')) + 'さん ' + str_replace(status.text.decode('utf-8')) ts = TinySegmenter() result = ts.tokenize(read_text) string_jp = '' string_en = '' for seg in result: seg = re.sub('^\s+', '', seg) if (re.match(u'(?:[^\u0000-\u007F]|[\d+]|^[A-Za-rt-z]{1}$)', seg)) and not re.match(u'^[aA]$', seg) :#日本語が含まれる call(['echo "{text}" | say -v Victoria -r 200 >/dev/null 2>&1'.format(text=string_en)], shell=True) string_en = '' string_jp = string_jp + seg else : call(['SayKotoeri2 -s 110 "{text}" >/dev/null 2>&1'.format(text=string_jp)], shell=True) string_jp = '' string_en = string_en + ' ' + seg if(string_jp) : call(['SayKotoeri2 -s 110 "{text}" >/dev/null 2>&1'.format(text=string_jp)], shell=True) else : call(['echo "{text}" | say -v Victoria -r 200 >/dev/null 2>&1'.format(text=string_en)], shell=True) except Exception, e: print >> sys.stderr, 'Encountered Exception:', e pass
def demo(): segmenter = TinySegmenter() print(u' | '.join(segmenter.tokenize(u"私の名前は中野です")).encode('utf-8'))