def make_keywords(sentence): # gooAPIが発行してくれたAPI ID with open('apikey.json', 'r') as f: api_data = json.load(f) app_id = api_data['keyword_api_key'] api = GoolabsAPI(app_id) # See sample response below. # 例としてpythonによるデータ分析入門の中表紙の文字をコピペしたものをbodyとして # キーワード抽出してみた。 # template = u'1.2なぜPythonはデータ分析者におすすめなのか\n私自身を含む多くの人にとって、Pythonという言語は恋に落ちやすい言語です。1991年の登場の時から、\nPythonは、PerlやRubyなどの言語と並び、最も有名な動的プログラミング言語の1つでしたPythonと\nRubyは最近では、多数のWebフレームワーク(たとえば、RubyではRails, PythonではDjango)を使ったWebサイト構築で特に有名です。これらの言語はよくスクリプト言語と呼ばれます。これは、汚くてもすぐに書ける短いプログラム、つまり、スクリプトを書くのに使えるからです。' template = sentence sample_response = api.keyword(title="photo01", body=template, max_num=5) # pprintで、整形された状態でprintできる(sample_responseは辞書型のデータ) # pprint.pprint(sample_response) # max_num個のキーワードをリスト型にして出力。一緒に出てくる数字は重要度を表す? keywords_list = sample_response['keywords'] data = [] for keyword in keywords_list: data.extend(list(keyword.keys())) return data
def morph(ctx, app_id, sentence_file, json_flag, sentence, info_filter, pos_filter, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, unicode, unicode) -> None # NOQA """ Morphological analysis for Japanese.""" app_id = clean_app_id(app_id) sentence = clean_sentence(sentence, sentence_file) if info_filter: info_filter = info_filter.replace(',', '|') if pos_filter: pos_filter = pos_filter.replace(',', '|') api = GoolabsAPI(app_id) ret = api.morph( sentence=sentence, info_filter=info_filter, pos_filter=pos_filter, request_id=request_id, ) if json_flag: click.echo(format_json(api.response.json())) return for words in ret['word_list']: for word in words: click.echo(','.join(word))
def keyword(ctx, app_id, body_file, json_flag, title, body, max_num, forcus, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, int, unicode, unicode) -> None # NOQA """Extract "keywords" from an input document. """ app_id = clean_app_id(app_id) body = clean_body(body, body_file) api = GoolabsAPI(app_id) ret = api.keyword( title=title, body=body, max_num=max_num, forcus=forcus, request_id=request_id, ) if json_flag: click.echo(format_json(api.response.json())) return for k in ret['keywords']: k = dict((key.encode('utf-8'), k[key]) for key in k.keys()) for keyword, score in six.iteritems(k): click.echo(u'{0},{1}'.format(text(keyword), score))
def judge(word): app_id = "39bc88fcf7da5a2e42e311dbf872353f8a23960f7d4f021b20fefc7504ec76c6" api = GoolabsAPI(app_id) # See sample response below. ret = api.similarity(query_pair=[word, favoriteword]) print ret['score'] return ret['score']
def __init__(self, text: str, goolab_api_key: str): """ GoolabAPIを使用して文字列から問題作成をするクラス :param text: 問題作成用文字列 :param goolab_api_key: goolabのAPI Key """ self.text = text self.goolab = GoolabsAPI(goolab_api_key) self.keywords = self.__get_keywords() self.questions = self.__create_questions()
def get_posChecker(keyword_list): app_id = "72557413b523d38db2d1de26f8095928d43d6d0882707ed41249f9edb643db45" api = GoolabsAPI(app_id) check_list = [] for keyword in keyword_list: check_list.append( str(api.morph(sentence=keyword)["word_list"][0][0][1])) if len(list(set(check_list))): return True else: return False
def similarity(ctx, app_id, json_flag, query_pair, request_id): # type: (Context, unicode, bool, List[unicode], unicode) -> None """ Scoring the similarity of two words. """ app_id = clean_app_id(app_id) api = GoolabsAPI(app_id) ret = api.similarity(query_pair=query_pair, request_id=request_id) if json_flag: click.echo(format_json(api.response.json())) return click.echo('{0:.16f}'.format(ret['score']))
def hiragana(ctx, app_id, sentence_file, json_flag, sentence, output_type, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, unicode) -> None # NOQA """ Convert the Japanese to Hiragana or Katakana. """ app_id = clean_app_id(app_id) sentence = clean_sentence(sentence, sentence_file) api = GoolabsAPI(app_id) ret = api.hiragana(sentence=sentence, output_type=output_type, request_id=request_id) if json_flag: click.echo(format_json(api.response.json())) return click.echo(ret['converted'])
def shortsum(ctx, app_id, review_file, json_flag, review, length, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, unicode) -> None # NOQA """Summarize reviews into a short summary.""" app_id = clean_app_id(app_id) review_list = clean_review(review, review_file) length_int = clean_length(length) # type: Optional[int] api = GoolabsAPI(app_id) ret = api.shortsum( review_list=review_list, length=length_int, request_id=request_id, ) if json_flag: click.echo(format_json(api.response.json())) return click.echo(ret['summary'])
def chrono(ctx, app_id, sentence_file, json_flag, sentence, doc_time, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, unicode) -> None # NOQA """Extract expression expressing date and time and normalize its value """ app_id = clean_app_id(app_id) sentence = clean_sentence(sentence, sentence_file) api = GoolabsAPI(app_id) ret = api.chrono( sentence=sentence, doc_time=doc_time, request_id=request_id, ) if json_flag: click.echo(format_json(api.response.json())) return for pair in ret['datetime_list']: click.echo(u'{0}: {1}'.format(text(pair[0]), pair[1]))
def entity(ctx, app_id, sentence_file, json_flag, sentence, class_filter, request_id): # type: (Context, unicode, Optional[IO], bool, unicode, unicode, unicode) -> None # NOQA """ Extract unique representation from sentence. """ app_id = clean_app_id(app_id) sentence = clean_sentence(sentence, sentence_file) if class_filter: class_filter = class_filter.replace(',', '|') api = GoolabsAPI(app_id) ret = api.entity(sentence=sentence, class_filter=class_filter, request_id=request_id) if json_flag: click.echo(format_json(api.response.json())) return for ne in ret['ne_list']: click.echo(','.join(ne))
class QuestionGenerator(object): def __init__(self, text: str, goolab_api_key: str): """ GoolabAPIを使用して文字列から問題作成をするクラス :param text: 問題作成用文字列 :param goolab_api_key: goolabのAPI Key """ self.text = text self.goolab = GoolabsAPI(goolab_api_key) self.keywords = self.__get_keywords() self.questions = self.__create_questions() def __get_keywords(self): """ textパラメータからGoolabAPIを使用してキーワード抽出 :return: キーワードリスト """ keywords = [] if not self.text: return [] ret = self.goolab.entity(sentence=self.text, class_filter=u"PSN|ORG|ART|DAT") for idx in range(len(ret['ne_list'])): key = (ret['ne_list'][idx][0], ret['ne_list'][idx][1]) keywords.append(key) return keywords def __create_questions(self): """ 抽出したキーワードに文章を付加して問題文を生成する 生成した問題文を”リスト”で返すことに注意 :param keyword: 抽出したキーワード :return questions: 生成した問題文 """ # カテゴリ別で問題を作成 questions = [] for key in self.keywords: if key[1] == 'PSN': questions.append(key[0] + 'は何をしたか?') questions.append(key[0] + 'について説明せよ。') elif key[1] == 'ART': questions.append(key[0] + 'とは何か?') questions.append(key[0] + 'は誰が考案したか?') elif key[1] == 'DAT': questions.append(key[0] + 'には何が起こったか?') elif key[1] == 'ORG': questions.append(key[0] + 'はいつ作られたか?') questions.append(key[0] + 'について説明せよ。') return questions def get_questions(self, max_questions: int = 3): """ 問題文リストからランダムに問題を返す :param max_questions: 問題数 :return: ランダムに選択された問題文リスト """ n_ques = len(self.questions) if max_questions > n_ques: max_questions = n_ques return random.sample(self.questions, max_questions)
from goolabs import GoolabsAPI import configparser config = configparser.ConfigParser() config.read('config.ini') app_id = config['gooAPI']['id'] api = GoolabsAPI(app_id) ret = api.entity(sentence=u"鈴木さんがきょうの9時30分に横浜に行きます。") print(ret)
def get_request(text): api = GoolabsAPI(key) response = api.morph(sentence=text) return response
def get_promise_content(letter): api = GoolabsAPI(GOO_API_KEY) index = 0 # letter = "クリスマスに岩見と一緒に東京駅に来て" # 時刻情報正規化API # chrono_response = api.chrono(sentence=letter) # 固有表現抽出API entity_response = api.entity(sentence=letter) # 形態素解析API # morph_response = api.morph(sentence=letter) date = '' hour = '' min = '' place = '' for i in range(len(entity_response["ne_list"])): # 日付の抽出 if entity_response["ne_list"][i][1] == "DAT": day_response = api.chrono(sentence=entity_response["ne_list"][i][0]) if len(day_response['datetime_list']) != 0: date = day_response["datetime_list"][0][1] # 約束内容の要素番号を算出 day = entity_response["ne_list"][i][0] index = letter.find(day) + len(day) # 時間の抽出 elif entity_response["ne_list"][i][1] == "TIM": time_len = len(entity_response["ne_list"][i][0]) for j in range(time_len): if entity_response["ne_list"][i][0][j] == u"時": hour = entity_response["ne_list"][i][0][0:j] hour_n = j # 約束内容の要素番号を算出 time = entity_response["ne_list"][i][0] index = letter.find(time) + len(time) if j != time_len - 1: if entity_response["ne_list"][i][0][j + 1] == u"半": min = "30" elif entity_response["ne_list"][i][0][j] == u"分": min = entity_response["ne_list"][i][0][hour_n + 1:j] elif entity_response["ne_list"][i][1] == "LOC": place = entity_response["ne_list"][i][0] # 約束内容を出力 # 約束内容の最初の文字に格助詞が入ってるか調べる morph_response = api.morph(sentence=letter[index:len(letter)]) if morph_response["word_list"][0][0][1] == u"格助詞" or morph_response["word_list"][0][0][1] == u"読点": par_len = len(morph_response["word_list"][0][0][0]) content = letter[index + par_len:len(letter)] else: content = letter[index:len(letter)] return {'date': date, 'hour': hour, 'min': min, 'content': content, 'place': place}
APP_ROOT = os.path.dirname(os.path.abspath(__file__)) # 設定ファイルの読み込み config = configparser.ConfigParser() config.read(APP_ROOT + '/config_local.ini') # 設定ファイルからgooラボAPIに関する情報を取得 Goo_API_APPLICATION_ID = config.get("Goo_API", "ApplicationId") # 設定ファイルからRECRUITのAPIに関する情報を取得 RECRUITE_API_PROOFREADING_API_KEY = config.get("RECRUITE_API", "ProofreadingAPIKey") RECRUITE_API_PROOFREADING_URL = config.get("RECRUITE_API", "ProofreadingURL") # JSON送信用のヘッダー headers = {"Content-type": "application/json"} # gooラボAPIのAPIクライアント設定 gooAPI = GoolabsAPI(Goo_API_APPLICATION_ID) ## 辞書データ取得 json_open = open(APP_ROOT + '/dict.json', 'r') HumbleLangDict = json.load(json_open) ## 名詞用辞書データ取得 json_open = open(APP_ROOT + '/noun.json', 'r') HumbleNounDict = json.load(json_open) # 探索の省略が可能な品詞(Part of speech to omit) Posto = ['句点', '読点', '空白', '格助詞', '終助詞', '括弧', '助数詞', '助助数詞', '冠数詞'] # 人名と会社名をリストで取得する関数 def get_list_people_companies(sentence):
""" データベースに接続する""" connector = MySQLdb.connect( unix_socket="/Applications/MAMP/tmp/mysql/mysql.soc\ k", host="localhost", db="Wordrop", user="******", passwd="root", charset="utf8") cur = connector.cursor() cur.execute("select * from home;") app_id = "2d84d0d734ebefeb1f4dcf8ae106ec9d2f3b72a5be084a1014d6e27a9002ffef" api = GoolabsAPI(app_id) response = api.morph(pos_filter="名詞", sentence=tex) #sentenceにある言葉を形態素解析する value = response["word_list"] #形態素解析の結果を代入 \ response = api.entity(sentence=value) #名詞の固有表現を求める \ list = response.values() pp(value) response = api.entity(sentence=value[0][0][0]) print type(value) ss = []
from goolabs import GoolabsAPI import configparser config = configparser.ConfigParser() config.read('config.ini') app_id = config['gooAPI']['id'] api = GoolabsAPI(app_id) ret = api.keyword( request_id="keyword-req001", title="「和」をコンセプトとする 匿名性コミュニケーションサービス「MURA」", body="NTTレゾナント株式会社(本社:東京都港区、代表取締役社長:若井 昌宏", max_num=10, forcus="ORG", ) print(ret)
import os import subprocess sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') # チャット画面で生成されたテキストファイルを読み込む chat_text = "" with open('./data/chat_text.txt', newline='', encoding='utf-8') as f: for row in f: chat_text = row print(row) app_id = "13346dab6a67b41a884de45bcbf4bdc523ced4da711c35346f0a8501ef0b23fe" api = GoolabsAPI(app_id) echat_text = chat_text.encode('shift-jis') uchat_text = echat_text.decode('shift-jis') # # See sample response below. # ret = api.entity(sentence=uchat_text) # with open('./data/entity_response.json', 'w', newline='', encoding='utf-8') as f: # f.write(str(ret)) # See sample response below. ret = api.morph(sentence=uchat_text) with open('./data/morph_response.json', 'w', newline='', encoding='utf-8') as f: f.write(str(ret)) # print (json.dumps(ret, sort_keys = True, indent = 4)) # {'word_list': [[['トイレットペーパー', '名詞', 'トイレットペーパー'], \
from goolabs import GoolabsAPI import json # api取得 app_id = "9707a9ca41154956524fe5ef01ba774b4305ccc701adfb6be574a87ba4a5687b" api = GoolabsAPI(app_id) # 元のテキストデータ f = open('before.txt', 'r', encoding='UTF-8') data = f.read() OriginalText = data # 単語探索関数 def SearchForWords(sentence): for start in range(len(sentence)): for end in range(len(sentence) - 1, start - 1, -1): testKey = '' for check in range(start, end + 1): testKey += sentence[check][0] if testKey in HumbleLangDict: if testKey not in HitWordList: HitWordList.append(testKey) # 単語置き換え用関数 def ChangeWord(text, HitWordList): ConvertedText = text for word in HitWordList: ConvertedText = ConvertedText.replace(word, HumbleLangDict[word]) return ConvertedText
マッチした文字列の元であるkeyを返す ''' for key in adict.keys(): if re.search(key, text): return key def one_xlat(match): return adict[dedictkey(match.group(0))] return rx.sub(one_xlat, text) original_text = '大変だ!抽選10回もできるぞ!(基本5回、ツイートで5回)' # csvファイルから辞書データを作成 with open('dict.csv', 'r') as f: csvdata = csv.reader(f) data = [x for x in csvdata] datadic = dict(data) APPID = sys.argv[1] api = GoolabsAPI(APPID) req = api.hiragana(sentence=original_text, output_type='hiragana') after = multiple_replace(req['converted'], datadic) print('Before:\t' + original_text) print('After:\t' + after)