Example #1
0
def wrapper_jp(string, width):
    """Japanese string with newline wrapping function"""
    segmenter = TinySegmenter()
    tokens = segmenter.tokenize(string)
    token_remain = lambda: len(tokens) > 0
    # save lines shorter than width into result
    result = ""
    while token_remain():
        line = ""
        # accumulate tokens whose total is shorter than width into line
        while token_remain() and len(line + tokens[0]) <= width:
            line += tokens.pop(0)
        else:
            result += line + ('\n' if token_remain() else '')
    # print(result)
    return result
Example #2
0
    def on_status(self, status):
        
        flg = 0
        for buff in exception_ids:
            if (status.author.screen_name == buff) : flg = 1
        
        if not hasattr(status, 'retweeted_status') and flg != 1:
            try:
                print u'\n---{name}/@{screen}---\n   {text}\nvia {src} {created}'.format(
                        name = status.author.name,
                        screen = status.author.screen_name,
                        text = status.text.replace('&amp;','&'),
                        src = status.source,
                        created = status.created_at)
                read_text = str_replace(status.author.name.decode('utf-8')) + 'さん ' + str_replace(status.text.decode('utf-8'))
            
                ts = TinySegmenter()
                result = ts.tokenize(read_text)
                string_jp = ''
                string_en = ''
                for seg in result:
                    seg = re.sub('^\s+', '', seg)
                    if (re.match(u'(?:[^\u0000-\u007F]|[\d+]|^[A-Za-rt-z]{1}$)', seg)) and not re.match(u'^[aA]$', seg) :#日本語が含まれる
                        call(['echo "{text}" | say -v Victoria -r 200 >/dev/null 2>&1'.format(text=string_en)], shell=True)
                        string_en = ''
                        string_jp = string_jp + seg
                    else :
                        call(['SayKotoeri2 -s 110 "{text}" >/dev/null 2>&1'.format(text=string_jp)], shell=True)
                        string_jp = ''
                        string_en = string_en + ' ' + seg

                if(string_jp) :
                    call(['SayKotoeri2 -s 110 "{text}" >/dev/null 2>&1'.format(text=string_jp)], shell=True)
                else :
                    call(['echo "{text}" | say -v Victoria -r 200 >/dev/null 2>&1'.format(text=string_en)], shell=True)

            except Exception, e:
                print >> sys.stderr, 'Encountered Exception:', e
                pass
def demo():
    segmenter = TinySegmenter()
    print(u' | '.join(segmenter.tokenize(u"私の名前は中野です")).encode('utf-8'))
def demo():
    segmenter = TinySegmenter()
    print(u' | '.join(segmenter.tokenize(u"私の名前は中野です")).encode('utf-8'))