예제 #1
0
def NLP_SDK(text,
            method='depParser',
            APP_ID=_APP_ID,
            API_KEY=_API_KEY,
            SECRET_KEY=_SECRET_KEY,
            **options):
    '''
    依据百度SDK官方文档的参数名称不做任何修改:<https://ai.baidu.com/docs#/NLP-Python-SDK/top>
    :param texts: 需要打标签的文档,部分方法需要以列表形式给出配对
    :param method: 功能名称
    :param APP_ID: 项目账号信息
    :param API_KEY: 项目账号信息
    :param SECRET_KEY: 项目账号信息
    :param options: 其他可选参数
    :return: 返回百度SDK返回结果
    method 功能名称全体:
    词法分析 lexer,词法分析(定制版)lexerCustom,依存句法分析 depParser,
    词向量表示 wordEmbedding,DNN语言模型 dnnlm,词义相似度 wordSimEmbedding,短文本相似度 wordSimEmbedding,
    评论观点抽取 commentTag,情感倾向分析 sentimentClassify,文章标签 keyword,文章分类 topic
    '''
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    # 词法分析
    if method == 'lexer':
        result = client.lexer(text, **options)
    # 词法分析(定制版)
    elif method == 'lexerCustom':
        result = client.lexerCustom(text, **options)
    # 依存句法分析
    elif method == 'depParser':
        result = client.depParser(text, **options)
    # 词向量表示
    elif method == 'wordEmbedding':
        result = client.wordEmbedding(text, **options)
    # DNN语言模型
    elif method == 'dnnlm':
        result = client.dnnlm(text, **options)
    # 词义相似度
    elif method == 'wordSimEmbedding':
        word1, word2 = text[0], text[1]
        result = client.wordSimEmbedding(word1, word2, **options)
    # 短文本相似度
    elif method == 'simnet':
        text1, text2 = text[0], text[1]
        result = client.simnet(text1, text2, **options)
    # 评论观点抽取
    elif method == 'commentTag':
        result = client.commentTag(text, **options)
    # 情感倾向分析
    elif method == 'sentimentClassify':
        result = client.sentimentClassify(text, **options)
    # 文章标签
    elif method == 'keyword':
        title, content = text[0], text[1]
        result = client.keyword(title, content, **options)
    # 文章分类
    elif method == 'topic':
        title, content = text[0], text[1]
        result = client.topic(title, content, **options)
    return result
예제 #2
0
class BaiduNLP:
    def __init__(self):
        self.APP_ID = '9519234'
        self.API_KEY = 'CIwEvSR9m9hEWnQp2GK7LGKI'
        self.SECRET_KEY = 's4hA4YTO1SjqIkRzTCT5uHSa715BKHFL'
        self.baiduNlp = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    '''----------分词-----------'''

    def wordseg(self, words):
        return self.baiduNlp.wordseg(words)

    '''----------词性标注-----------'''

    def wordpos(self, words):
        return self.baiduNlp.wordpos(words)

    '''----------向量表示-----------'''

    def wordembedding(self, words1, words2=''):
        return self.baiduNlp.wordembedding(words1, words2)

    '''----------评论观点抽取,默认7教育-----------'''

    def commenttag(self, words, type=7):
        comment = self.baiduNlp.commentTag(words, type)  #返回处理
        commentTags = comment[u'tags']  #得到评论观点,可能有多条评论观点
        length = len(commentTags)  #得到有几个评论观点
        validComment = []
        for i in range(length):
            temp = commentTags[i]
            abstract = self.__deleteUnvalid(temp[u'abstract'])
            tempComement = {
                u'abstract': abstract,
                u'adj': temp[u'adj'],
                u'fea': temp[u'fea'],
                u'type': temp[u'type']
            }
            validComment.append(tempComement)
        return validComment

    '''----------dnn语言模型-----------'''

    def dnnlm(self, words):
        return self.baiduNlp.dnnlm(words)

    '''----------短文相似度-----------'''

    def simnet(self, essay1, essay2):
        return self.baiduNlp.simnet(essay1, essay2)

    def __deleteUnvalid(self, sentence):  #去除</span>
        abstract = sentence.replace("<span>", "")
        abstract = abstract.replace("</span>", "")
        return abstract
예제 #3
0
class BaiduDnnLM(LanguageModel):
    def __init__(self):
        super(BaiduDnnLM, self).__init__()
        conn_baidu_ai = ConnectBaiduAI()
        self._client = AipNlp(conn_baidu_ai.app_id, conn_baidu_ai.api_key,
                              conn_baidu_ai.secret_key)

    def __repr__(self):
        return "<This is Baidu NLP API>"

    def get_reply(self, text):
        """  获取接口回复
        :param text: 输入文本内容
        :return: 百度API返回结果,dict
        """
        return self._client.dnnlm(text=text)

    def get_ppl(self, text):
        """  获取句子混淆度
        :param text: 输入文本内容
        :return: 句子混淆度,混淆度越小,句子越符合语言逻辑
        """
        return self._client.dnnlm(text=text)["ppl"]
예제 #4
0
파일: nlp.py 프로젝트: newszeng/ai_writer
    def dnn(self, text):
        # print(config.get('site', 'name'))
        APP_ID = config.get('baidu', 'app_id')
        API_KEY = config.get('baidu', 'app_key')
        SECRET_KEY = config.get('baidu', 'secret_key')

        client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
        # result = client.synthesis(text, 'zh', 1, {
        #     'vol': 11,
        # })
        # text = "床前明月光"

        # """ 调用DNN语言模型 """
        # print(client)
        return client.dnnlm(text)
예제 #5
0
class BaiDuNlp(BaseAPI):
    """docstring for BaiDuNlp"""
    def __init__(self):
        super(BaiDuNlp, self).__init__()
        self.APP_ID = '10323015'
        self.API_KEY = 'zYbYSDZxIFvH4I53ye2jp8qf'
        self.SECRET_KEY = '3os02bOi9hxZC9775MbKVcYo4BP7GTSm'
        self.client = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    def cutWord(self, text, times, errormessage):
        if times > 5:
            print("error:" + text + " in BaiDuNlp\nMessage: " + errormessage)
            jb = JieBa()
            return jb.cutWord(text)

        result = []
        res = self.client.dnnlm(text)
        if 'error_code' not in res:
            for r in res['items']:
                result.append(r['word'])
            return result
        else:
            return self.cutWord(text, times + 1, res['error_msg'])

    def cutWordByCSVFile(self, filenamein, filenameout):
        rawdata = fileutil.readFileFromCSV(filenamein)
        fileutil.deleteFileIfExist(filenameout)
        result = ""
        totalProcess = len(rawdata)
        process = 0
        for rd in rawdata:
            text = rd.text
            res = self.cutWord(text, 0, "")
            process = process + 1
            self.showProcess(process, totalProcess)
            for r in res:
                result = result + r + " "
            result = result + "\n"
        fileutil.writeFile(filenameout, result)
예제 #6
0
class NLP():
    def __init__(self):
        """ 你的 APPID AK SK """
        APP_ID = '16043979'
        API_KEY = 'vr2XhyMVrjW7dWZOZjqeLsae'
        SECRET_KEY = 'RypiqTeFnVIED0zpKOxRIZHbc5a8a2wE'

        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    def lexical_analysis(self):
        '''词法分析接口向用户提供分词、词性标注、专名识别三大功能;
        能够识别出文本串中的基本词汇(分词),对这些词汇进行重组、标注组合后词汇的词性,
        并进一步识别出命名实体'''
        text = "百度是一家高科技公司"
        """ 调用词法分析 """
        resp = self.client.lexer(text)
        print(resp)

    def Interdependent(self):
        '''依存句法分析
        依存句法分析接口可自动分析文本中的依存句法结构信息,
        利用句子中词与词之间的依存关系来表示词语的句法结构信息(如“主谓”、“动宾”、“定中”等结构关系),
        并用树状结构来表示整句的结构(如“主谓宾”、“定状补”等)。'''
        text = '小情歌'
        resp = self.client.depParser(text)
        print(resp)

    def vector(self):
        '''词向量分析
        词向量表示接口提供中文词向量的查询功能。'''
        word = '张飞'
        resp = self.client.wordEmbedding(word)
        print(resp)

    def Dnn(self):
        '''中文DNN语言模型接口用于输出切词结果并给出每个词在句子中的概率值,判断一句话是否符合语言表达习惯。'''
        # word = '你饭吃在哪?'
        word = '你今天上班了吗'
        resp = self.client.dnnlm(word)
        # ppl	float	描述句子通顺的值:数值越低,句子越通顺
        print(resp)
        print(resp['ppl'])

    def compare(self):
        # 需要字数相等
        '''词义相似度'''
        word1 = '茶壶'
        word2 = '水瓶'
        resp = self.client.wordSimEmbedding(word1, word2)
        print(resp)
        # score 相似度分数,分数越接近1越相似
        print(resp['score'])

    def text_compare(self):
        # 短文本相似度
        text1 = "穿衣裳"

        text2 = "穿衣服"
        """ 调用短文本相似度 """
        resp = self.client.simnet(text1, text2)
        print(resp)

    def comment(self):
        '''评论观点抽取'''
        text = '苹果笔记本后盖不好看'
        """ 如果有可选参数 """
        options = {}
        options["type"] = 13
        """ 带参数调用评论观点抽取 """
        resp = self.client.commentTag(text, options)
        print(resp)
        print(resp['items'])

    def emotion(self):
        # 情感分析
        text = '今天天气不错'
        resp = self.client.sentimentClassify(text)
        print(resp)
        print(resp['items'])
        print('积极情绪概率:%s' % resp['items'][0]['positive_prob'])
        print('消极情绪概率:%s' % resp['items'][0]['negative_prob'])

    def Tag(self):
        '''文章标签'''
        # 文章标签服务能够针对网络各类媒体文章进行快速的内容理解,根据输入含有标题的文章,
        # 输出多个内容标签以及对应的置信度,用于个性化推荐、相似文章聚合、文本内容分析等场景。
        title = "iphone手机出现“白苹果”原因及解决办法,用苹果手机的可以看下"

        content = "如果下面的方法还是没有解决你的问题建议来我们门店看下成都市锦江区红星路三段99号银石广场24层01室。"
        """ 调用文章标签 """
        resp = self.client.keyword(title, content)
        print(resp)

    def Ar_classification(self):
        '''文章分类'''
        title = "美男齐聚!吴彦祖冯德伦谢霆锋一起颁奖"

        content = "今晚的金像奖,《特警新人类》主演吴彦祖、冯德伦、谢霆锋、李璨琛一起颁奖,今年是电影上映二十年。一开始只有冯德伦、李璨琛上台,说“他们两个有事来不了”,随后吴彦祖和谢霆锋也从VCR中“走”到了台上,他们现场问大家想不想看《特警新人类3》,气氛热烈。"
        """ 调用文章分类 """
        # 可能一个文章有多个分类
        resp = self.client.topic(title, content)
        print(resp)
        print(resp['item'])

    def modify(self):
        '''文本纠错'''
        text = "只能门锁"
        """ 调用文本纠错 """
        resp = self.client.ecnet(text)
        print(resp)
        print(resp['item'])
        print('文本错误,正确结果:%s' % resp['item']['correct_query'])

    def emotion_qingxu(self):
        text = '今天本来高兴的'
        """ 如果有可选参数 """
        options = {}
        options["scene"] = "default"
        """ 带参数调用对话情绪识别接口 """
        resp = self.client.emotion(text, options)
        print(resp)
        print(resp['items'])
        print(type(resp['items']))
        print('回复:%s' % resp['items'][0]['replies'])

    def News(self):
        '''新闻摘要'''
        # 没有勾选此接口

        content = "麻省理工学院的研究团队为无人机在仓库中使用RFID技术进行库存查找等工作,创造了一种..."

        maxSummaryLen = 300
        """ 调用新闻摘要接口 """
        resp = self.client.newsSummary(content, maxSummaryLen)
        print(resp)
    # 中文词向量表示
    result = aipNlp.wordEmbedding(title[1])
    print(result)

    # 传入两个词计算两者相似度
    result = aipNlp.wordSimEmbedding('漂亮', '美丽')
    print(result)

    # 情感倾向分析
    result = aipNlp.sentimentClassify('Python具有丰富和强大的库')
    # +sentiment表示情感极性分类结果, 0:负向,1:中性,2:正向
    print(result)

    # 传入短语,计算中文DNN语言模型,语法结构分析
    result = aipNlp.dnnlm('python是程序设计语言')
    print(result)

    # 传入两个短文本,计算相似度
    result = aipNlp.simnet('python是程序设计语言', 'c是程序设计语言')
    # score两个文本相似度得分
    print(result)

    # 传入评论文本,获取情感属性
    result = aipNlp.commentTag('面包很好吃')
    print(result)

    # 依存句法分析
    result = aipNlp.depParser('python是最好的语言')
    print(result)
예제 #8
0
 def CheckText(self,text):
     Client = AipNlp(self.ID, self.KEY, self.SECRET_KEY)
     Result = Client.dnnlm(text)
     return Result
예제 #9
0
SECRET_KEY = ''  # ……另一个密码

client = AipNlp(APP_ID, API_KEY, SECRET_KEY)  # 登录百度云

poemfile = open('/data/zzcf.txt', encoding='UTF-8').read()  # 在词频作诗中
p1 = r"[\u4e00-\u9fa5]{5,7}[\u3002|\uff0c]"
pattern1 = re.compile(p1)
result1 = pattern1.findall(poemfile)

list_ppl = []

i = 0
j = 0
print('第' + str(j) + '秒')
for row in result1:  # 每句诗的情感分析     注意:百度要求request限制为5个/秒
    try:
        i += 1
        temp = client.dnnlm(row)
        list_ppl.append(temp['ppl'])
        if i == 5:
            j += 1
            print('第' + str(j) + '秒')
            i = 0
            time.sleep(1)
    except:
        break

avg_ppl = np.mean(list_ppl)

print("该诗词数据的可读性平均得分为{:.2f}。(该分值越小表示可读性越好)".format(avg_ppl))
예제 #10
0
class EMIP:
    '''
	Process explict multi-intent.
	'''
    def __init__(self, workspace_id):
        manager = Manager()
        self.baidu_nlp = None
        self.watson_nlp = None
        self.conjunction = [
            '和', '還有', '然後', '或者', '或', '及', '跟', '與', '或', '以及', '並且', '並',
            '而且', '再來', '因此', '因為', '所以', '由於', '不但', '不僅', '而且', '以便'
        ]
        self.conj = conj
        self.score_saver = manager.dict()
        self.intent_saver = manager.dict()
        self.success_saver = manager.dict()
        self.failed_saver = manager.dict()
        self.lock = Lock()
        self.verb = []
        self.entities = []
        self.workspace_id = workspace_id

    def baidu_api(self, APP_ID, API_KEY, SECRET_KEY):
        '''
		Load Baidu NLP API.
		'''

        self.baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    def watson_api(self, usr_name, passwd):
        '''
		Load Watson NLP API.
		'''
        self.watson_nlp = ConversationV1(username=usr_name,
                                         password=passwd,
                                         version='2017-04-21')

    def detect_entities(self, text):
        """
		Detects entities in the text by Google NLP API.
		"""

        client = language.LanguageServiceClient()

        if isinstance(text, six.binary_type):
            text = text.decode('utf-8')

        # Instantiates a plain text document.
        document = types.Document(content=text,
                                  type=enums.Document.Type.PLAIN_TEXT)

        # Detects entities in the document. You can also analyze HTML with:
        #   document.type == enums.Document.Type.HTML
        entities = client.analyze_entities(document).entities

        # entity types from enums.Entity.Type
        entity_type = ('UNKNOWN', 'PERSON', 'LOCATION', 'ORGANIZATION',
                       'EVENT', 'WORK_OF_ART', 'CONSUMER_GOOD', 'OTHER')

        entity = list(set(i for i in entities))

        self.entities.extend(entity)

    def detectVerb(self, sen):
        """
		Detect Verbs in text by Baidu NLP API.
		"""
        data = self.baidu_api.lexer(sen)['items']
        detect_verb = [i['item'] for i in data if i['pos'] == 'v']
        self.verb.extend(detectVerb)

    def detectIntent(self, sentence):
        '''
		Detect Intent in text by Watson NLP API.
		'''

        if os.getenv("conversation_workspace_id") is not None:
            self.workspace_id = os.getenv("conversation_workspace_id")

        response = self.watson_api.message(workspace_id=self.workspace_id,
                                           input={'text': sentence})

        if 'intents' in response:
            return response['entities'], response['intents'][0][
                'confidence'], response['intents'][0]['intent']
        else:
            return None, None, None

    def detectConj(self, sen):
        '''
		Detect conjunctions in the text and return the number of them.
		'''
        for i in self.conj:
            result = [m.start() for m in re.finditer(i, sen)]
            self.conjunction.extend(result)
        return len(result)

    def processConj(self, sen, local):
        '''
		Find the best separating point by statistic language model.
		'''
        resultConj = None

        # location conjunction processing.
        if local:
            minScore = float('inf')
            answer = []

        # global conjunction processing.
        else:
            minScore = self.baidu_nlp.dnnlm(sen)['ppl']
            answer = [sen]

        for conj in self.conj:
            if (conj not in sen):
                continue

            # find the location of conj in input sen.
            conj_pos = [m.start() for m in re.finditer(conj, sen)]
            for pos in conj_pos:

                # Base on the position of each conj, we separate the input sentence to leftSen and rightSen.
                leftSen = text[:pos]
                rightSen = text[pos + len(conj):]

                if (rightSen == ''):
                    continue

                # process leftSen...
                if (leftSen not in self.score_saver):
                    leftScore = self.baidu_nlp.dnnlm(leftSen)['ppl']
                    self.self.score_saver[leftSen] = leftScore
                else:
                    leftScore = self.self.score_saver[leftSen]

                # process rightSen...
                if (rightSen not in self.self.score_saver):
                    rightScore = self.baidu_nlp.dnnlm(rightSen)['ppl']
                    self.self.score_saver[rightSen] = rightScore
                else:
                    rightScore = self.self.score_saver[rightSen]

                # Treat the average score as the score of this conj.
                score = (leftScore + rightScore) / 2

                # Find the conj with smallest perplexity score.
                if (minScore > score):
                    resultConj = i
                    answer = [leftSen, rightSen]

        return (answer, resultConj)

    def global_process(self, sen, conj=''):
        if (conj == ''):
            conj_count = self.detectConj(sen=sen)
        else:
            conj_count = 1

        if conj_count:
            # Find the best separating point.
            ans, conj = self.processConj(sen, local=False)

            # The input sentence has the smallest perplexity.
            # This means there is no need to cut the input sentence.
            # So input it to local_process.
            if conj == None:
                self.local_process(sen=sen, global_sen=sen)

            # Separate the input sentence to two part and input each of them to process separately.
            elif ans:
                for i in ans:
                    p = Process(target=self.global_process, args=(i, conj))
                    p.start()
                    p.join()

        # don't have any conj.
        else:
            self.local_process(sen=sen, global_sen=sen, conj_count=0)

    def local_process(self,
                      sen,
                      global_sen,
                      global_intent='',
                      conj='',
                      conj_count=None):
        if (conj_count == None):
            conj_count = self.detectConjunction(sen)

        if conj_count:
            # Find the best separating point.
            ans, conj = self.processConj(sen, local=True)

            # Separate the input sentence to two part and input each of them to process separately.
            if ans:
                for i in ans:
                    p = Process(target=self.localProcess,
                                args=(i, global_sen, intent))
                    p.start()
                    p.join()

            # There is no need to cut the input sentence.
            # Detect the intent and entities.
            else:
                intent, ent, score = self.detectIntent(sen)
                self.lock.acquire()
                self.intent_saver[sen] = intent
                self.success_saver[sen] = [sen, intent, global_sen]
                self.lock.release()

        # Don't have any conj.
        else:

            # Detect the intent and entities.
            intent, ent, score = self.detectIntent(sen)

            # Recognized
            if intent:
                self.lock.acquire()
                self.intent_saver[sen] = intent
                self.success_saver[sen] = [sen, intent, global_sen]
                self.lock.release()

            # Unrecognized
            else:
                self.lock.acquire()
                self.failed_saver[sen] = [sen, global_sen]
                self.saver[global_sen] = global_intent
                self.success_saver[global_sen] = [
                    global_sen, global_intent, global_sen
                ]
                self.lock.release()

    def contextProcess(self, global_sen):
        '''
		Guess the intent by their context.
		'''
        if (type(self.intent_saver) != list):
            self.intent_saver = list(self.intent_saver.values())

        self.success_saver = list(self.success_saver.values())
        self.failed_saver = list(self.failed_saver.values())

        # There don't have any unrecognized record.
        if self.failed_saver:
            return None
        else:
            # Detect verbs in global_sen.
            self.detectVerb(global_sen)

            # Detect entities in global_sen.
            self.detect_entities(global_sen)

            # process unrecognized records.
            for failed_sen in self.failed_saver:
                judge = True

                # Guess the intent of failed_sen by their context.
                guessIntent = self.contextGuess(failed_sen[0], failed_sen[1],
                                                global_sen)

                # Recognized
                if guessIntent:
                    for success_sen in self.success_saver:

                        # Already recognize
                        if (success_sen[2] == failed_sen[1]
                                and guessIntent == failed_sen[1]):
                            judge = False
                            break

                    # Recognized
                    if judge:
                        self.success_saver.append(guessIntent)
                    else:
                        continue

    def contextGuess(self, sen, candidate, global_sen):
        '''
		Add contexts and detect the intent again.
		'''
        guessIntent = None

        # collect the entities in this unrecognized record.
        entitiesOri = [i for i in self.entities if i in sen]

        if (entitiesOri == []):

            # collect the entities in the relevant sub_sentence.
            entitiesTar = [i for i in self.entities if i in candidate]

            # Add the context(entities) and attempt to detect the intent.
            for i in entitiesTar:

                # Ignore enetities which already in this unrecognized record.
                if (i in sen):
                    continue

                # Add the context.
                tmpS = sen + i

                minScore = -1
                intent, ent, score = self.detectIntent(tmpS)

                # Unrecognized
                if (score == None):
                    continue

                # Find the intent with best recognized confidence score.
                if (minScore < score):
                    guessIntent = intent

            # Recognized
            if guessIntent:
                return guessIntent

        posCan = global_sen.find(candidate)
        posEndCan = posCan + len(candidate) - 1
        v = [
            i[1] for i in self.verb if i[1] in candidate
            and int(i[0]) >= posCan and int(i[0]) <= posEndCan
        ]

        # Add the context(verbs) and attempt to detect the intent.
        for i in v:
            # Ignore verbs which already in this unrecognized record.
            if (i in sen):
                continue

            # Add the context.
            tmpS = i + sen

            minScore = -1
            intent, ent, score = self.detectIntent(tmpS)

            # Unrecognized
            if (score == None):
                continue

            # Find the intent with best recognized confidence score.
            if (minScore < score):
                guessIntent = intent

        # Recognized
        if guessIntent:
            return guessIntent

    def getIntent(self):
        return self.intent_saver
예제 #11
0
SECRET_KEY = ''

client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

# text = "百度是一家高科技公司"
# client.lexer(text)  # 分词,词性标注,专名识别

# text = "张飞"
# client.depParser(text)  # 词语的句法结构信息(如“主谓”、“动宾”、“定中”等结构关系),并用树状结构来表示整句的结构(如“主谓宾”、“定状补”等)

# word = "张飞"
# client.wordEmbedding(word)  # 中文词向量


text = "闻君昌矣望朝斜,街上迁春流不胜。小蜃驱天俱铸触,东溪州雁拂清明。"
print(client.dnnlm(text)['ppl'])   # 输出切词结果并给出每个词在句子中的概率值,判断一句话是否符合语言表达习惯。



# word1 = "北京";word2 = "上海"
# client.wordSimEmbedding(word1, word2)  # 得到两个词的相似度

'''
text1 = "强大";text2 = "富强"
client.simnet(text1, text2)     # 得到两篇短文的相似度
'''

# text = "三星电脑电池不给力"
# options = {}
# options["type"] = 13
# client.commentTag(text, options)  # 对包含主观观点信息的文本进行情感极性类别(积极、消极、中性)的判断,并给出相应的置信度
예제 #12
0
class BaiduAi:
    """百度ai接口
    https://ai.baidu.com/docs#/NLP-Python-SDK/f524c757
    """

    # empCount = 0

    def __init__(self):
        print('kaishi')
        # print(config.get('site', 'name'))
        self.app_id = config.get('baidu', 'app_id')
        self.api_key = config.get('baidu', 'app_key')
        self.secret_key = config.get('baidu', 'secret_key')
        self.client = AipNlp(self.app_id, self.api_key, self.secret_key)
        # """ 你的 APPID AK SK """
    def lexer(self, text):
        """ 调用词法分析
        """

        return self.client.lexer(text)


    def depParser(self, text):
        """依存句法分析
        
        """

        return self.client.depParser(text)


    def dnn(self, text):


        
        # result = client.synthesis(text, 'zh', 1, {
        #     'vol': 11,
        # })
        # text = "床前明月光"

        # """ 调用DNN语言模型 """
        # print(client)
        return self.client.dnnlm(text)
    def wordSimEmbedding(self,text1, text2):
        """ 词义相似度
        
        """
        return self.client.wordSimEmbedding( text1, text2)
    def simnet(self,text1, text2):
        """ 短文本相似度 
        
        """
        return self.client.simnet( text1, text2)
    def commentTag(self,content):
        """ 评论观点抽取
        
        """
        return self.client.commentTag( content)

    def topic(self,title,content):
        """ 调用文章分类 
        
        """
        try:
            return self.client.topic(title, content)
        except:
            return {'log_id': 8348398184393122510, 'item': {'lv2_tag_list': [], 'lv1_tag_list': []}}
            
    def keyword(self,title,content):
        """ 文章标签
        文章标签服务能够针对网络各类媒体文章进行快速的内容理解,根据输入含有标题的文章,输出多个内容标签以及对应的置信度,用于个性化推荐、相似文章聚合、文本内容分析等场景。
        
        """
        return self.client.keyword(title, content)
    def sentimentClassify(self,content):
        """情感倾向分析
        对包含主观观点信息的文本进行情感极性类别(积极、消极、中性)的判断,并给出相应的置信度。
        
        """

        return self.client.sentimentClassify(content)
    def ecnet(self,content):
        """智能纠错
        
        """

        return self.client.ecnet(content)

    def newsSummary(self,title,content):
        """生成摘要
        暂时无权限
        """

        return self.client.newsSummary(content, 200)
SECRET_KEY = 'r0mHeKH7TWVpPa0weKMVMpQ2whosIPGM '

aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
result = aipNlp.lexer('黄健是一个帅锅')  #词法分析接口包含了中文分词和词性标注的功能
for key in result:
    print(key, result[key])

result = aipNlp.wordEmbedding("黄健")  #用于词汇数学计算,词向量
print(result)

result = aipNlp.wordSimEmbedding('早饭', '早点')  #词语相似度
print(result)
result = aipNlp.wordSimEmbedding('帅', '英俊')  #词语相似度
print(result)
result = aipNlp.wordSimEmbedding('强大', '厉害')  #词语相似度
print(result)

result = aipNlp.sentimentClassify('这家公司差的很')  #情感分析
print(result)

result = aipNlp.dnnlm('百度是个搜索公司')  #语法结构分析
print(result)

result = aipNlp.simnet('清华学霸', '清华学渣')  #词频相似度
print(result)

result = aipNlp.commentTag('面包很好吃,吃的我拉肚子了')  #评论观点提取,判断情感属性
print(result)

result = aipNlp.depParser('百度是一家伟大的公司')
print(result)
예제 #14
0
# 百度api提供的语言模型服务

from aip import AipNlp
""" 你的 APPID AK SK """
APP_ID = '11731139'
API_KEY = '7sUyLlu1wYdBy9rozkIZVN1u'
SECRET_KEY = '6tM1yqYphPN4XAshGFDiyqRD1lrFQarf'

client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

text = "云解析服务中查询内网Zone的名称服务器的功能介绍是什么?"
""" 
调用DNN语言模型 
log_id	uint64	请求唯一标识码
word	string	句子的切词结果
prob	float	该词在句子中的概率值,取值范围[0,1]
ppl	float	描述句子通顺的值:数值越低,句子越通顺
"""

result = client.dnnlm(text)

print(result)

title = "云容器引擎的产品优势"

content = "CCE基于业界主流的Docker和Kubernetes开源技术,并进行了大量的商用增强,在系统可靠性、高性能、开源社区的兼容性等多个方面具有独特的优势。"
""" 调用文章标签 """
result1 = client.keyword(title, content)
print(result1)
예제 #15
0
API_KEY = ''
SECRET_KEY = ''

client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
text = "百度是一家高科技公司"
client.lexer(text)  # 分词,词性标注,专名识别

text = "张飞"
client.depParser(
    text)  # 词语的句法结构信息(如“主谓”、“动宾”、“定中”等结构关系),并用树状结构来表示整句的结构(如“主谓宾”、“定状补”等)

word = "张飞"
client.wordEmbedding(word)  # 中文词向量

text = "床前明月光"
client.dnnlm(text)  # 输出切词结果并给出每个词在句子中的概率值,判断一句话是否符合语言表达习惯。

word1 = "北京"
word2 = "上海"
client.wordSimEmbedding(word1, word2)  # 得到两个词的相似度

text1 = "强大"
text2 = "富强"
client.simnet(text1, text2)

text = "三星电脑电池不给力"
options = {}
options["type"] = 13
client.commentTag(text, options)  # 对包含主观观点信息的文本进行情感极性类别(积极、消极、中性)的判断,并给出相应的置信度

text = "苹果是一家伟大的公司"