Beispiel #1
0
def CkipReturn(in_text): #in_text is string
	segmenter = CKIPSegmenter('changchengtu', 'asd')
	try:
		segmented_in_text_result = segmenter.process(unicode(in_text))
	except:
		segmented_in_text_result = segmenter.process(unicode('got an error'))
	return segmented_in_text_result
Beispiel #2
0
def CkipReturn(in_text):  #in_text is string
    segmenter = CKIPSegmenter('changcheng.tu', 'a10206606')
    try:
        segmented_in_text_result = segmenter.process(unicode(in_text))
    except:
        segmented_in_text_result = segmenter.process(unicode('got an error'))
    return segmented_in_text_result
Beispiel #3
0
def CkipReturn(in_text): #in_text is string
	segmenter = CKIPSegmenter('_dehao', 'dehao')
	try:
		segmented_in_text_result = segmenter.process(unicode(in_text))
	except:
		segmented_in_text_result = segmenter.process(unicode('got an error'))
	return segmented_in_text_result
Beispiel #4
0
def to_ckip(inp):
    segmenter = CKIPSegmenter('Bolin', 'Bolin')  #ckip連線帳戶
    try:
        result = segmenter.process(inp)  #斷詞結果
        if result['status_code'] != '0':  #若斷詞失敗
            print('Process Failure: ' + result['status'])
        SaveList = []
        SaveSeg = []
        sen_all = []
        for sentence in result['result']:
            for term in sentence:
                SaveList.append(term['term'])  #詞陣列
                SaveSeg.append(term['pos'])  #詞性陣列
        for word, pos in zip(SaveList, SaveSeg):
            wp = word + '(' + pos + ')'
            sen_all.append(wp)
        combine = ' '.join(sen_all)

        return [SaveList, SaveSeg, combine]
    except:
        print('error :', inp)
        return False
def to_ckip(inp):
	segmenter = CKIPSegmenter('Bolin', 'Bolin') #ckip連線帳戶
	try:
		result = segmenter.process(inp) #斷詞結果
		if result['status_code'] != '0': #若斷詞失敗
			print('Process Failure: ' + result['status'])
		SaveList=[]
		SaveSeg=[]
		sen_all=[]
		for sentence in result['result']:
			for term in sentence:
				SaveList.append(term['term']) #詞陣列
				SaveSeg.append(term['pos']) #詞性陣列
		for word,pos in zip(SaveList,SaveSeg):
			wp=word+'('+pos+')'
			sen_all.append(wp)
		combine=' '.join(sen_all)

		return [SaveList,SaveSeg,combine]
	except:
		print('error :',inp)
		return False
client = MongoClient('mongodb://localhost:27018/')
db = client['ntp_councilor']
collection = db['ntp_crs']
collection_save = db['ntp_platform_example']
crs = list(collection.find().limit(5))

for cr in crs:
    for plat in cr["platform"]:
        platforms_term = []
        plat_save = {}
        plat_save["cr_id"] = cr["_id"]
        plat_save["cr_name"] = cr["name"]
        plat_save["plat_origin"] = plat
        try:
            result = segmenter.process(plat)
            if result['status_code'] != '0':
                print('Process Failure: ' + result['status'])
                platforms_term = cuttest(plat)
            else:
                for sentence in list(result['result']):
                    for term in sentence:
                        if term['pos'] != u"PERIODCATEGORY" and term['pos'] != u"COMMACATEGORY" and term['pos'] != u"PAUSECATEGORY" and term['pos'] != u"PARENTHESISCATEGORY":
                            print(term['term'].encode('utf-8'), term['pos'])
                            platforms_term.append({"pos":term['pos'], "term":term['term']})
            sleep(2)
        except Exception, e:
            print("error")
            print(e)
            platforms_term = cuttest(plat)
        finally:
Beispiel #7
0
segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['ntp_bills']
# collection_save = db['test']
bills = list(collection.find())

for bill in bills:
    description_verb = []
    description_nonu = []
    description_term = []
    try:
        result = segmenter.process(bill["description"])
        if result['status_code'] != '0':
            print('Process Failure: ' + result['status'])
            tupleUse = cuttest(bill["description"])
            description_term = tupleUse[0]
            description_nonu = tupleUse[1]
            description_verb = tupleUse[2]
        else:
            for sentence in list(result['result']):
                for term in sentence:
                    print(term['term'].encode('utf-8'), term['pos'])
                    if re.match("N", term['pos']) != None and term['term'] not in description_nonu:
                        description_term.append(term['term'])
                        description_nonu.append(term['term'])
                    if re.match("V", term['pos']) != None and term['pos'] != 'ADV' and term['term'] not in description_verb:
                        description_term.append(term['term'])
            if(len(news['story'])>3):
                try:
                    dic_news_save = news
                    story_term_ckip_all = []
                    story_term_ckip_noun = []
                    story_term_ckip_verb = []
                    story_term_ckip_tc_all = []
                    story_term_ckip_tc_noun = []
                    story_term_ckip_tc_verb = []
                    story = news['story'].split('\n')

                    for ind, sy in enumerate(story):
                        one_sentence = sy
                        # print(one_sentence.encode('utf-8'))
                        if len(one_sentence) > 0:
                            result = segmenter.process(one_sentence)
                            if result['status_code'] != '0':
                                print('Process Failure: ' + result['status'])
                            for sentence in list(result['result']):
                                for term in sentence:
                                    # print(term['term'].encode('utf-8'), term['pos'])
                                    if re.match("N", term['pos']) != None:
                                        if ind <=3:
                                            story_term_ckip_tc_all.append(term['term'])
                                            story_term_ckip_tc_noun.append(term['term'])
                                        story_term_ckip_all.append(term['term'])
                                        story_term_ckip_noun.append(term['term'])
                                    if re.match("V", term['pos']) != None and term['pos'] != 'ADV':
                                        if ind <=3:
                                            story_term_ckip_tc_all.append(term['term'])
                                            story_term_ckip_tc_verb.append(term['term'])
Beispiel #9
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD')
result = segmenter.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'], term['pos'])


# Usage example of the CKIPParser class
parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
Beispiel #10
0
    content.append(part_sentence)

f_account = codecs.open("ckip_account.txt", 'r')
account_info = f_account.readlines()

segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1])

j = 0
words = []
for sentence in content:

    for line in sentence:
        j += 1
        if line == u"\n":
            continue

        print(str(j) + "/" + str(num_part_sentence))
        print(line)

        result = segmenter.process(line)
        if result['status_code'] != '0':
            print('Process Failure: ' + result['status'])

        for sent in result['result']:
            for term in sent:
                words.append(term['term'])

        time.sleep(3)

f2.write(u' '.join(words))
Beispiel #11
0

def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('gcsn', 'rb303147258')
# result = segmenter.process('這是一隻可愛的小花貓')
result = segmenter.process(
    '一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'].encode('utf-8'), term['pos'])
print("")

# Usage example of the CKIPParser class
parser = CKIPParser('gcsn', 'rb303147258')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
Beispiel #12
0

def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('gcsn', 'rb303147258')
# result = segmenter.process('這是一隻可愛的小花貓')
result = segmenter.process('一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'].encode('utf-8'), term['pos'])
print("")


# Usage example of the CKIPParser class
parser = CKIPParser('gcsn', 'rb303147258')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])
Beispiel #13
0
db = client['councilor']
collection = db['ntp_crs']
collection_save = db['ntp_platform']
crs = list(collection.find())

for cr in crs:
    for plat in cr["platform"]:
        platforms_verb = []
        platforms_nonu = []
        platforms_term = []
        plat_save = {}
        plat_save["cr_id"] = cr["_id"]
        plat_save["cr_name"] = cr["name"]
        plat_save["plat_origin"] = plat
        try:
            result = segmenter.process(plat)
            if result['status_code'] != '0':
                print('Process Failure: ' + result['status'])
                tupleUse = cuttest(plat)
                platforms_term = tupleUse[0]
                platforms_nonu = tupleUse[1]
                platforms_verb = tupleUse[2]
            else:
                for sentence in list(result['result']):
                    for term in sentence:
                        print(term['term'].encode('utf-8'), term['pos'])
                        if re.match("N", term['pos']) != None:
                            platforms_term.append(term['term'])
                            platforms_nonu.append(term['term'])
                        if re.match(
                                "V",
Beispiel #14
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD')
result = segmenter.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'], term['pos'])


# Usage example of the CKIPParser class
parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']: