import sys sys.path.append("../..") from ArticutAPI import Articut from pprint import pprint import json if __name__ == "__main__": try: #使用自己的斷詞額度。 with open("../../account.info", "r") as f: userDICT = json.loads(f.read()) username = userDICT["email"] apikey = userDICT["apikey"] atc = Articut(username=userDICT["email"], apikey=userDICT["apikey"]) except: #使用公用的斷詞額度。(每小時 2000 字) atc = Articut() downSample = '''台指選擇權盤後-年線保衛戰失利 外資續賣台股 台股期權盤後-華為拖累電子 台股摔破支撐 華為風暴持續擴大 三王領跌重挫148點 創波段新低 華為風暴 大跌退守10300點 美商禁令聲聲催,台股電子三王大跌,指數再破年線 跌148.85點 華為風暴擴大 一度破10300點 外資狂撤,台股欲振乏力 利空燒不盡,台股挫百點失年線 華為風暴!台股摜破前低,將往半年線測試 電子股恐慌殺盤出籠,可成慘破200元大關
#!/usr/bin/env python3 # -*- coding:utf-8 -*- from ArticutAPI import Articut from pprint import pprint username = "" apikey = "" articut = Articut(username=username, apikey=apikey) def main(inputSTR): resultDICT = articut.parse(inputSTR) return resultDICT def tfidf(resultDICT): keywordLIST = articut.analyse.extractTags(resultDICT, topK=3) return keywordLIST def crime(resultDICT): crimeLIST = articut.LawsToolkit.getCrime(resultDICT) return crimeLIST def penalty(resultDICT): penaltyLIST = articut.LawsToolkit.getPenalty(resultDICT) return penaltyLIST
#!/usr/bin/env python3 # -*- coding:utf-8 -*- from ArticutAPI import Articut from pprint import pprint from requests import post username = "" apikey = "" articut = Articut(username=username, apikey=apikey) def main(inputSTR): resultDICT = articut.parse(inputSTR) return resultDICT if __name__ == "__main__": inputSTR = inputSTR = """位於台南的50年老房子, 地址是台南市東區長榮路3段30巷3號。 這裡承載著曾經是學生宿舍的使命。 我們繼續把這樣的溫度傳遞, 讓往來的人都把這裡當做家, 用緩慢的步調體驗台南的在地生活。""".replace(" ", "").replace("\n", "") resultDICT = main(inputSTR) #pprint(resultDICT) #取得完整地址 addTWLIST = articut.getAddTWLIST(resultDICT) #pprint(addTWLIST)
from ArticutAPI import Articut except: from ArticutAPi import Articut import json from pprint import pprint if __name__ == "__main__": try: #使用自己的斷詞額度。 with open("../../account.info", "r") as f: userDICT = json.loads(f.read()) username = userDICT["email"] apikey = userDICT["apikey"] atc = Articut(username=userDICT["email"], apikey=userDICT["apikey"]) except: #使用免費的斷詞額度。 #實體化 Articut() atc = Articut() #載入 Demo 用的文字 with open("./PengHu.txt", encoding="utf-8") as f: contentLIST = [l.replace("\n", "") for l in f.readlines()] resultLIST = [] for c in contentLIST: print("Processing:{}/{} >> {}".format( contentLIST.index(c) + 1, len(contentLIST), c)) resultDICT = atc.parse(c, openDataPlaceAccessBOOL=True) locationLIST = atc.getLocationStemLIST(resultDICT)
#!/usr/bin/env python3 # -*- coding:utf-8 -*- from ArticutAPI import Articut def main(inputSTR, nlptool): resultDICT = articut.parse(inputSTR, level="lv3") return resultDICT if __name__ == "__main__": inputSTR = "他關上大門,把窗戶也關上了。" articut = Articut() resultLIST = main(inputSTR, articut) eventLIST = resultLIST["event"] print(eventLIST)
import sys sys.path.append("../..") from ArticutAPI import Articut import json from pprint import pprint if __name__ == "__main__": try: #使用自己的斷詞額度。 with open("../../account.info", "r") as f: userDICT = json.loads(f.read()) username = userDICT["email"] apikey = userDICT["apikey"] atc = Articut(username=userDICT["email"], apikey=userDICT["apikey"]) except: #使用免費的斷詞額度。 #實體化 Articut() atc = Articut() #Demo 用的文字:載入政府機構名稱前。 inputSTR = "國軍退除役官兵輔導委員會簡稱退輔會。 " resultDICT = atc.parse(inputSTR) print("1. 政府機構名稱直接「斷詞」處理:") pprint(resultDICT["result_pos"]) print("=====================") inputSTR = "國軍退除役官兵輔導委員會簡稱退輔會。 " resultDICT = atc.parse(inputSTR, userDefinedDictFILE="../../Public_UserDefinedDict/KNOWLEDGE_govTW.json") print("2. 政府機構名稱用「自定字典」處理:")
# Installed via git clone import sys sys.path.append("../..") from ArticutAPI import Articut import json if __name__ == "__main__": try: #使用自己的斷詞額度。 with open("../../account.info", "r") as f: userDICT = json.loads(f.read()) username = userDICT["email"] apikey = userDICT["apikey"] atc = Articut(username=userDICT["email"], apikey=userDICT["apikey"]) except: #使用免費的斷詞額度。 #實體化 Articut() atc = Articut() # 載入 Demo 用的文字 text = open("./InputString.txt", "r").read() sentLIST = text.split("\n") print("ArticutAPI Term Extraction Demo") for sentence in sentLIST: if "" == sentence.strip(): continue result = atc.parse(sentence)
def jsonTextReader(jsonFilePath): with open(jsonFilePath, encoding = "utf-8") as f: Content = f.read() return Content def jsonFileWriter(jsonDICT, jsonFileName): with open(jsonFileName, mode="w") as f: json.dump(jsonDICT, f, ensure_ascii=False) return None def EventAnalysis(inputSTR, nlptool): resultDICT = articut.parse(inputSTR, level="lv3") return resultDICT if __name__== "__main__": articut = Articut(username = "******", apikey="yfYwawQRAvuCkPR#W2uug+bpZoN7cEwyfYwawQRAvuCkPR#W2uug+bpZoN7cEw") fileTUPLE = ("../example/text.txt", "./A. forsteri.txt") MouseSTR = jsonTextReader(fileTUPLE[0]) #print(MouseSTR) PenguinSTR = jsonTextReader(fileTUPLE[1]) #print(PenguinSTR) MouseDICT_lv3 = EventAnalysis(MouseSTR, articut) MouseLIST = MouseDICT_lv3["event"] MouseDICT_lv2 = articut.parse(MouseSTR, level = "lv2") MouseLIST_lv2 = articut.getVerbStemLIST(MouseDICT_lv2) #for item in MouseLIST_lv2: # if item != '\n' and item != []: # print(item)
args str[], resultDICT dict Output: resultDICT dict """ from ArticutAPI import Articut import json try: accountDICT = json.loads(open("./account.info", encoding="utf-8").read()) except: accountDICT = {"username":"", "apikey":""} articut = Articut(username=accountDICT["username"], apikey=accountDICT["apikey"]) DEBUG_Cashier = True userDefinedDICT = {"硬幣": ["銅板"]} class CreditCard: def __init__(self): self.balance = 87 self.owner = "Peter" def callToRaiseCredit(self, amount): self.balance = self.balance + amount # 將符合句型的參數列表印出。這是 debug 或是開發用的。 def debugInfo(inputSTR, utterance): if DEBUG_Cashier: print("[Cashier] {} ===> {}".format(inputSTR, utterance))
def main(inputSTR): articut = Articut(username=username, apikey=apikey) resultDICT = articut.parse(inputSTR) return resultDICT