Esempio n. 1
0
def init(vocab_path=None, append=None):
    global vocab, Segmentor
    if vocab is None:
        vocabulary.init(vocab_path, append=append)
        print('ENCODE_UNK', ENCODE_UNK, file=sys.stderr)
        vocab = vocabulary.get_vocab()
        Segmentor = gezi.Segmentor()
Esempio n. 2
0
flags.DEFINE_string('seg_method', 'default', '')

import sys,os

import nowarning
from libsegment import *
import conf
from conf import WORDS_SEP
#need ./data ./conf
#Segmentor.Init()

print('seg_method:', FLAGS.seg_method, file=sys.stderr)

sys.path.append('../')
import gezi 
Segmentor = gezi.Segmentor()

for line in open(sys.argv[1]):
  l = line.rstrip().split('\t')
  img = l[0]
  img = img[img.rindex('/') + 1:]
  if len(l) < 3:
    continue
  index = 0
  for comment in l[2:]:
    if len(comment) == 0:
      continue
    #words = Segmentor.Segment(comment, ' ')

    words = WORDS_SEP.join(Segmentor.Segment(comment, FLAGS.seg_method))
Esempio n. 3
0
def init():
    global vocab, Segmentor
    if vocab is None:
        print('ENCODE_UNK', ENCODE_UNK, file=sys.stderr)
        vocab = vocabulary.get_vocab()
        Segmentor = gezi.Segmentor()
Esempio n. 4
0
#        \author   chenghuige  
#          \date   2016-09-05 11:48:05.006754
#   \Description  
# ==============================================================================

  
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import gezi
import libsegment

seg = gezi.Segmentor()

print('\t'.join(seg.Segment('美女一定要支持')))
print('\x01'.join(seg.Segment('Oh q the same thing to me')))
print('\x01'.join(seg.Segment('Oh q the same thing to me', 'phrase_single')))
print('\x01'.join(seg.Segment('Oh q the same thing to me', 'phrase')))
print('\t'.join(seg.Segment('绿鹭')))
print('\t'.join(seg.segment('绿鹭')))
print('\t'.join(seg.segment_phrase('绿鹭')))
print('\t'.join(gezi.seg.Segment('绿鹭', libsegment.SEG_NEWWORD)))
print('\t'.join(gezi.seg.Segment('绿鹭')))

print('|'.join(gezi.segment_char('a baby is looking at 我的小伙伴oh 我不no no没关系 是不是   tian, that not ')))


from libword_counter import Vocabulary