def CkipReturn(in_text): #in_text is string segmenter = CKIPSegmenter('changchengtu', 'asd') try: segmented_in_text_result = segmenter.process(unicode(in_text)) except: segmented_in_text_result = segmenter.process(unicode('got an error')) return segmented_in_text_result
def CkipReturn(in_text): #in_text is string segmenter = CKIPSegmenter('changcheng.tu', 'a10206606') try: segmented_in_text_result = segmenter.process(unicode(in_text)) except: segmented_in_text_result = segmenter.process(unicode('got an error')) return segmented_in_text_result
def CkipReturn(in_text): #in_text is string segmenter = CKIPSegmenter('_dehao', 'dehao') try: segmented_in_text_result = segmenter.process(unicode(in_text)) except: segmented_in_text_result = segmenter.process(unicode('got an error')) return segmented_in_text_result
def to_ckip(inp): segmenter = CKIPSegmenter('Bolin', 'Bolin') #ckip連線帳戶 try: result = segmenter.process(inp) #斷詞結果 if result['status_code'] != '0': #若斷詞失敗 print('Process Failure: ' + result['status']) SaveList = [] SaveSeg = [] sen_all = [] for sentence in result['result']: for term in sentence: SaveList.append(term['term']) #詞陣列 SaveSeg.append(term['pos']) #詞性陣列 for word, pos in zip(SaveList, SaveSeg): wp = word + '(' + pos + ')' sen_all.append(wp) combine = ' '.join(sen_all) return [SaveList, SaveSeg, combine] except: print('error :', inp) return False
def to_ckip(inp): segmenter = CKIPSegmenter('Bolin', 'Bolin') #ckip連線帳戶 try: result = segmenter.process(inp) #斷詞結果 if result['status_code'] != '0': #若斷詞失敗 print('Process Failure: ' + result['status']) SaveList=[] SaveSeg=[] sen_all=[] for sentence in result['result']: for term in sentence: SaveList.append(term['term']) #詞陣列 SaveSeg.append(term['pos']) #詞性陣列 for word,pos in zip(SaveList,SaveSeg): wp=word+'('+pos+')' sen_all.append(wp) combine=' '.join(sen_all) return [SaveList,SaveSeg,combine] except: print('error :',inp) return False
client = MongoClient('mongodb://localhost:27018/') db = client['ntp_councilor'] collection = db['ntp_crs'] collection_save = db['ntp_platform_example'] crs = list(collection.find().limit(5)) for cr in crs: for plat in cr["platform"]: platforms_term = [] plat_save = {} plat_save["cr_id"] = cr["_id"] plat_save["cr_name"] = cr["name"] plat_save["plat_origin"] = plat try: result = segmenter.process(plat) if result['status_code'] != '0': print('Process Failure: ' + result['status']) platforms_term = cuttest(plat) else: for sentence in list(result['result']): for term in sentence: if term['pos'] != u"PERIODCATEGORY" and term['pos'] != u"COMMACATEGORY" and term['pos'] != u"PAUSECATEGORY" and term['pos'] != u"PARENTHESISCATEGORY": print(term['term'].encode('utf-8'), term['pos']) platforms_term.append({"pos":term['pos'], "term":term['term']}) sleep(2) except Exception, e: print("error") print(e) platforms_term = cuttest(plat) finally:
segmenter = CKIPSegmenter('gcsn', 'rb303147258') parser = CKIPParser('gcsn', 'rb303147258') client = MongoClient('mongodb://localhost:27017/') db = client['councilor'] collection = db['ntp_bills'] # collection_save = db['test'] bills = list(collection.find()) for bill in bills: description_verb = [] description_nonu = [] description_term = [] try: result = segmenter.process(bill["description"]) if result['status_code'] != '0': print('Process Failure: ' + result['status']) tupleUse = cuttest(bill["description"]) description_term = tupleUse[0] description_nonu = tupleUse[1] description_verb = tupleUse[2] else: for sentence in list(result['result']): for term in sentence: print(term['term'].encode('utf-8'), term['pos']) if re.match("N", term['pos']) != None and term['term'] not in description_nonu: description_term.append(term['term']) description_nonu.append(term['term']) if re.match("V", term['pos']) != None and term['pos'] != 'ADV' and term['term'] not in description_verb: description_term.append(term['term'])
if(len(news['story'])>3): try: dic_news_save = news story_term_ckip_all = [] story_term_ckip_noun = [] story_term_ckip_verb = [] story_term_ckip_tc_all = [] story_term_ckip_tc_noun = [] story_term_ckip_tc_verb = [] story = news['story'].split('\n') for ind, sy in enumerate(story): one_sentence = sy # print(one_sentence.encode('utf-8')) if len(one_sentence) > 0: result = segmenter.process(one_sentence) if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in list(result['result']): for term in sentence: # print(term['term'].encode('utf-8'), term['pos']) if re.match("N", term['pos']) != None: if ind <=3: story_term_ckip_tc_all.append(term['term']) story_term_ckip_tc_noun.append(term['term']) story_term_ckip_all.append(term['term']) story_term_ckip_noun.append(term['term']) if re.match("V", term['pos']) != None and term['pos'] != 'ADV': if ind <=3: story_term_ckip_tc_all.append(term['term']) story_term_ckip_tc_verb.append(term['term'])
from ckip import CKIPSegmenter, CKIPParser def traverse(root): """Helper function to traverse all leaf nodes of the given tree root.""" if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD') result = segmenter.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'], term['pos']) # Usage example of the CKIPParser class parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']:
content.append(part_sentence) f_account = codecs.open("ckip_account.txt", 'r') account_info = f_account.readlines() segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1]) j = 0 words = [] for sentence in content: for line in sentence: j += 1 if line == u"\n": continue print(str(j) + "/" + str(num_part_sentence)) print(line) result = segmenter.process(line) if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sent in result['result']: for term in sent: words.append(term['term']) time.sleep(3) f2.write(u' '.join(words))
def traverse(root): """Helper function to traverse all leaf nodes of the given tree root.""" if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('gcsn', 'rb303147258') # result = segmenter.process('這是一隻可愛的小花貓') result = segmenter.process( '一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'].encode('utf-8'), term['pos']) print("") # Usage example of the CKIPParser class parser = CKIPParser('gcsn', 'rb303147258') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']:
def traverse(root): """Helper function to traverse all leaf nodes of the given tree root.""" if 'child' in root: for child in root['child']: for leaf in traverse(child): yield leaf else: yield root # Usage example of the CKIPSegmenter class segmenter = CKIPSegmenter('gcsn', 'rb303147258') # result = segmenter.process('這是一隻可愛的小花貓') result = segmenter.process('一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!') if result['status_code'] != '0': print('Process Failure: ' + result['status']) for sentence in result['result']: for term in sentence: print(term['term'].encode('utf-8'), term['pos']) print("") # Usage example of the CKIPParser class parser = CKIPParser('gcsn', 'rb303147258') result = parser.process('這是一隻可愛的小花貓') if result['status_code'] != '0': print('Process Failure: ' + result['status'])
db = client['councilor'] collection = db['ntp_crs'] collection_save = db['ntp_platform'] crs = list(collection.find()) for cr in crs: for plat in cr["platform"]: platforms_verb = [] platforms_nonu = [] platforms_term = [] plat_save = {} plat_save["cr_id"] = cr["_id"] plat_save["cr_name"] = cr["name"] plat_save["plat_origin"] = plat try: result = segmenter.process(plat) if result['status_code'] != '0': print('Process Failure: ' + result['status']) tupleUse = cuttest(plat) platforms_term = tupleUse[0] platforms_nonu = tupleUse[1] platforms_verb = tupleUse[2] else: for sentence in list(result['result']): for term in sentence: print(term['term'].encode('utf-8'), term['pos']) if re.match("N", term['pos']) != None: platforms_term.append(term['term']) platforms_nonu.append(term['term']) if re.match( "V",