Beispiel #1
0
def CkipReturn(in_text): #in_text is string
	segmenter = CKIPSegmenter('changchengtu', 'asd')
	try:
		segmented_in_text_result = segmenter.process(unicode(in_text))
	except:
		segmented_in_text_result = segmenter.process(unicode('got an error'))
	return segmented_in_text_result
Beispiel #2
0
def CkipReturn(in_text):  #in_text is string
    segmenter = CKIPSegmenter('changcheng.tu', 'a10206606')
    try:
        segmented_in_text_result = segmenter.process(unicode(in_text))
    except:
        segmented_in_text_result = segmenter.process(unicode('got an error'))
    return segmented_in_text_result
Beispiel #3
0
def CkipReturn(in_text): #in_text is string
	segmenter = CKIPSegmenter('_dehao', 'dehao')
	try:
		segmented_in_text_result = segmenter.process(unicode(in_text))
	except:
		segmented_in_text_result = segmenter.process(unicode('got an error'))
	return segmented_in_text_result
Beispiel #4
0
def to_ckip(inp):
    segmenter = CKIPSegmenter('Bolin', 'Bolin')  #ckip連線帳戶
    try:
        result = segmenter.process(inp)  #斷詞結果
        if result['status_code'] != '0':  #若斷詞失敗
            print('Process Failure: ' + result['status'])
        SaveList = []
        SaveSeg = []
        sen_all = []
        for sentence in result['result']:
            for term in sentence:
                SaveList.append(term['term'])  #詞陣列
                SaveSeg.append(term['pos'])  #詞性陣列
        for word, pos in zip(SaveList, SaveSeg):
            wp = word + '(' + pos + ')'
            sen_all.append(wp)
        combine = ' '.join(sen_all)

        return [SaveList, SaveSeg, combine]
    except:
        print('error :', inp)
        return False
def to_ckip(inp):
	segmenter = CKIPSegmenter('Bolin', 'Bolin') #ckip連線帳戶
	try:
		result = segmenter.process(inp) #斷詞結果
		if result['status_code'] != '0': #若斷詞失敗
			print('Process Failure: ' + result['status'])
		SaveList=[]
		SaveSeg=[]
		sen_all=[]
		for sentence in result['result']:
			for term in sentence:
				SaveList.append(term['term']) #詞陣列
				SaveSeg.append(term['pos']) #詞性陣列
		for word,pos in zip(SaveList,SaveSeg):
			wp=word+'('+pos+')'
			sen_all.append(wp)
		combine=' '.join(sen_all)

		return [SaveList,SaveSeg,combine]
	except:
		print('error :',inp)
		return False
from bson.objectid import ObjectId
from ckip import CKIPSegmenter, CKIPParser
from time import sleep



def cuttest(sent):
    result_arr = []
    words_use = pseg.cut(test_sent)
    for word_use in words_use:
        result_arr.append({"pos":word_use.flag, "term":word_use.word})
    print("no ckip")
    return result_arr


segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27018/')
db = client['ntp_councilor']
collection = db['ntp_crs']
collection_save = db['ntp_platform_example']
crs = list(collection.find().limit(5))

for cr in crs:
    for plat in cr["platform"]:
        platforms_term = []
        plat_save = {}
        plat_save["cr_id"] = cr["_id"]
        plat_save["cr_name"] = cr["name"]
        plat_save["plat_origin"] = plat
Beispiel #7
0
    noun_terms_arr = []
    verb_terms_arr = []
    words_use = pseg.cut(test_sent)
    for word_use in words_use:
        if re.match("n", word_use.flag) != None and word_use.word not in  noun_terms_arr:
            noun_terms_arr.append(word_use.word)
        if re.match("v", word_use.flag) != None and word_use.word not in  verb_terms_arr:
            verb_terms_arr.append(word_use.word)
    result_arr.extend(noun_terms_arr)
    result_arr.extend(verb_terms_arr)
    print("no ckip")
    print(result_arr)
    return (result_arr, noun_terms_arr, verb_terms_arr)


segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['ntp_bills']
# collection_save = db['test']
bills = list(collection.find())

for bill in bills:
    description_verb = []
    description_nonu = []
    description_term = []
    try:
        result = segmenter.process(bill["description"])
        if result['status_code'] != '0':
from pymongo import MongoClient
from bson.objectid import ObjectId
from ckip import CKIPSegmenter, CKIPParser
from time import sleep
import datetime

def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root

segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['news_url_list']
collection_save = db['news_url_list_ckip_all']

# news_list = collection.find({"cr":u"吳碧珠"})
news_list = collection.find()
news_list = list(news_list)
for news in news_list:
    d = datetime.datetime.now()
    h = d.hour + d.minute / 60. + d.second / 3600.
    if h < 5.4 or h > 7.3:
        if "story" in news:
Beispiel #9
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD')
result = segmenter.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'], term['pos'])


# Usage example of the CKIPParser class
parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])
Beispiel #10
0
        sentence = sentence[pos + 1:]
        pos = sentence.find(u"。")
        if pos == -1:
            pos = sentence.find(u"?")

    if len(part_sentence) == 0:
        part_sentence.append(u"".join(sentence))
        num_part_sentence += 1

    content.append(part_sentence)

f_account = codecs.open("ckip_account.txt", 'r')
account_info = f_account.readlines()

segmenter = CKIPSegmenter(account_info[0][:-1], account_info[1][:-1])

j = 0
words = []
for sentence in content:

    for line in sentence:
        j += 1
        if line == u"\n":
            continue

        print(str(j) + "/" + str(num_part_sentence))
        print(line)

        result = segmenter.process(line)
        if result['status_code'] != '0':
from pymongo import MongoClient
from bson.objectid import ObjectId
from ckip import CKIPSegmenter, CKIPParser
from time import sleep
import datetime

def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root

segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['ntp_news_url_list']
collection_save = db['ntp_news_url_list_ckip']

def returnFile():
    with open("./diff_ids.txt") as f:
        content = f.readlines()
        return content
def parse():
    news_list = returnFile()
    # news_list = collection.find().skip(3441)
    # news_list = list(news_list)
Beispiel #12
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('gcsn', 'rb303147258')
# result = segmenter.process('這是一隻可愛的小花貓')
result = segmenter.process(
    '一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'].encode('utf-8'), term['pos'])
print("")

# Usage example of the CKIPParser class
parser = CKIPParser('gcsn', 'rb303147258')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
Beispiel #13
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('gcsn', 'rb303147258')
# result = segmenter.process('這是一隻可愛的小花貓')
result = segmenter.process('一、在地子弟商管博士林世宗主張「焚化爐輪燒或停爐」、「受污染地區垃圾袋補助」!為您監督市政、看緊荷包,打造士林、北投幸福家園!')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'].encode('utf-8'), term['pos'])
print("")


# Usage example of the CKIPParser class
parser = CKIPParser('gcsn', 'rb303147258')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
Beispiel #14
0
 def log_in(self):
     f_account = codecs.open("ckip_account.txt", 'r')
     account_info = f_account.readlines()
     self.segmenter = CKIPSegmenter(account_info[0][:-1],
                                    account_info[1][:-1])
Beispiel #15
0
    noun_terms_arr = []
    verb_terms_arr = []
    words_use = pseg.cut(test_sent)
    for word_use in words_use:
        if re.match("n", word_use.flag) != None:
            noun_terms_arr.append(word_use.word)
        if re.match("v", word_use.flag) != None:
            verb_terms_arr.append(word_use.word)
    result_arr.extend(noun_terms_arr)
    result_arr.extend(verb_terms_arr)
    print("no ckip")
    print(result_arr)
    return (result_arr, noun_terms_arr, verb_terms_arr)


segmenter = CKIPSegmenter('gcsn', 'rb303147258')
parser = CKIPParser('gcsn', 'rb303147258')

client = MongoClient('mongodb://localhost:27017/')
db = client['councilor']
collection = db['ntp_crs']
collection_save = db['ntp_platform']
crs = list(collection.find())

for cr in crs:
    for plat in cr["platform"]:
        platforms_verb = []
        platforms_nonu = []
        platforms_term = []
        plat_save = {}
        plat_save["cr_id"] = cr["_id"]
Beispiel #16
0
from ckip import CKIPSegmenter, CKIPParser


def traverse(root):
    """Helper function to traverse all leaf nodes of the given tree root."""
    if 'child' in root:
        for child in root['child']:
            for leaf in traverse(child):
                yield leaf
    else:
        yield root


# Usage example of the CKIPSegmenter class
segmenter = CKIPSegmenter('YOUR USERNAME', 'YOUR PASSWORD')
result = segmenter.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])

for sentence in result['result']:
    for term in sentence:
        print(term['term'], term['pos'])


# Usage example of the CKIPParser class
parser = CKIPParser('YOUR USERNAME', 'YOUR PASSWORD')
result = parser.process('這是一隻可愛的小花貓')
if result['status_code'] != '0':
    print('Process Failure: ' + result['status'])