verb_terms_arr = [] words_use = pseg.cut(test_sent) for word_use in words_use: if re.match("n", word_use.flag) != None and word_use.word not in noun_terms_arr: noun_terms_arr.append(word_use.word) if re.match("v", word_use.flag) != None and word_use.word not in verb_terms_arr: verb_terms_arr.append(word_use.word) result_arr.extend(noun_terms_arr) result_arr.extend(verb_terms_arr) print("no ckip") print(result_arr) return (result_arr, noun_terms_arr, verb_terms_arr) segmenter = CKIPSegmenter('gcsn', 'rb303147258') parser = CKIPParser('gcsn', 'rb303147258') client = MongoClient('mongodb://localhost:27017/') db = client['councilor'] collection = db['ntp_bills'] # collection_save = db['test'] bills = list(collection.find()) for bill in bills: description_verb = [] description_nonu = [] description_term = [] try: result = segmenter.process(bill["description"]) if result['status_code'] != '0': print('Process Failure: ' + result['status'])
if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('gcsn', 'rb303147258') # result = segmenter.process('這是一隻可愛的小花貓') result = segmenter.process( '一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'].encode('utf-8'), term['pos']) print("") # Usage example of the CKIPParser class parser = CKIPParser('gcsn', 'rb303147258') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in traverse(sentence['tree']): print(term['term'].encode('utf-8'), term['pos'])
def traverse(root): """Helper function to traverse all leaf nodes of the given tree root.""" if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD') result = segmenter.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'], term['pos']) # Usage example of the CKIPParser class parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in traverse(sentence['tree']): print(term['term'], term['pos'])