Example #1
0
reload(sys)  
sys.setdefaultencoding('utf8')

sys.path.append("./")

import string
from collections import defaultdict

import jiebav2
import jiebav2.posseg

seg_dict = defaultdict(lambda: 0)
threshold = 1

# 常用词库
jiebav2.load_userdict("./userdict.dic")

# 品牌库
jiebav2.load_userdict("./brands.dic")

# 
list_dirs =  os.walk("./product_dict")
for root,dirs,files in list_dirs:
	for f in files:
		if ".dic" in f:
			print os.path.join(root,f)
			jiebav2.load_userdict(os.path.join(root,f))


def trans_string(s):
	table = string.maketrans("","")
Example #2
0
parser.add_argument("-q", "--quiet", action="store_true", default=False, help="don't print loading messages to stderr")
parser.add_argument("-V", "--version", action="version", version="Jieba " + jiebav2.__version__)
parser.add_argument("filename", nargs="?", help="input file")

args = parser.parse_args()

if args.quiet:
    jiebav2.setLogLevel(60)
delim = text_type(args.delimiter)
cutall = args.cutall
hmm = args.hmm
fp = open(args.filename, "r") if args.filename else sys.stdin

if args.dict:
    jiebav2.initialize(args.dict)
else:
    jiebav2.initialize()
if args.user_dict:
    jiebav2.load_userdict(args.user_dict)

ln = fp.readline()
while ln:
    l = ln.rstrip("\r\n")
    result = delim.join(jiebav2.cut(ln.rstrip("\r\n"), cutall, hmm))
    if PY2:
        result = result.encode(default_encoding)
    print(result)
    ln = fp.readline()

fp.close()