reload(sys) sys.setdefaultencoding('utf8') sys.path.append("./") import string from collections import defaultdict import jiebav2 import jiebav2.posseg seg_dict = defaultdict(lambda: 0) threshold = 1 # 常用词库 jiebav2.load_userdict("./userdict.dic") # 品牌库 jiebav2.load_userdict("./brands.dic") # list_dirs = os.walk("./product_dict") for root,dirs,files in list_dirs: for f in files: if ".dic" in f: print os.path.join(root,f) jiebav2.load_userdict(os.path.join(root,f)) def trans_string(s): table = string.maketrans("","")
parser.add_argument("-q", "--quiet", action="store_true", default=False, help="don't print loading messages to stderr") parser.add_argument("-V", "--version", action="version", version="Jieba " + jiebav2.__version__) parser.add_argument("filename", nargs="?", help="input file") args = parser.parse_args() if args.quiet: jiebav2.setLogLevel(60) delim = text_type(args.delimiter) cutall = args.cutall hmm = args.hmm fp = open(args.filename, "r") if args.filename else sys.stdin if args.dict: jiebav2.initialize(args.dict) else: jiebav2.initialize() if args.user_dict: jiebav2.load_userdict(args.user_dict) ln = fp.readline() while ln: l = ln.rstrip("\r\n") result = delim.join(jiebav2.cut(ln.rstrip("\r\n"), cutall, hmm)) if PY2: result = result.encode(default_encoding) print(result) ln = fp.readline() fp.close()