def process_wordle2vector(self, root):
        resultDict = {}
        resultDict['result'] = []
        client = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY)

        for i in range(self.OBSERVE_NUM):
            resultDict['result'].append({})
            resultDict['result'][i]['name'] = self.KEY_WORDS[i + 2]

        path = root + '/wordleFinalResult(all&adjust).json'
        #UTF8_2_GBK(path, path)
        with open(path, 'r') as f:
            jsonObject = json.load(f)

        for nameKey in jsonObject.keys():
            print('processing ' + nameKey)
            if self.KEY_WORDS.index(nameKey) - 2 < self.OBSERVE_NUM:
                words = jsonObject[nameKey]
                ergodic_num = min(self.HIGH_FREQUENCY_THRESHOLD, len(words))
                vector = []

                num = 0
                index = 0
                while num < ergodic_num and index < len(
                        words):  ##if cant find a useful word in the end.
                    word = words[index][0]
                    analyresult = client.wordEmbedding(word)
                    error_flag = 0
                    while 'error_msg' in analyresult:
                        print('error: ' + analyresult['error_msg'])
                        if analyresult['error_msg'] == 'word error':
                            error_flag = 1
                            index += 1  ##if not in the algorithm. Use next word.
                            break
                        time.sleep(self.SLEEP_TIME)
                        analyresult = client.wordEmbedding(word)
                    if error_flag == 0:
                        vector.extend(analyresult['vec'])
                        num += 1

                resultDict['result'][self.KEY_WORDS.index(nameKey) -
                                     2]['value'] = vector

        save_path = root + '/' + 'wordleVectorFinalResult(all).json'
        with open(save_path, 'w') as f:
            json.dump(resultDict, f)
Exemple #2
0
def baidu_execl(web_content):
    APP_ID = '14812878'
    API_KEY = 'HxGQXVmxbwHU6YtE3uKAqWCN'
    SECRET_KEY = 'ujqAHGoIMXCXkzmrLED2KRw83qAsnYCC'
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    book = xlwt.Workbook(encoding='utf-8',
                         style_compression=0)  #creat execl file
    sheet = book.add_sheet('Media Monitor', cell_overwrite_ok=True
                           )  #creat a sheet in the file named "Media Monitor"

    sheet.write(0, 0, '舆情标题')
    sheet.write(0, 1, '原文地址')
    sheet.write(0, 2, '来源媒体')
    sheet.write(0, 3, '舆情通的分类')
    sheet.write(0, 4, '情绪分类')
    sheet.write(0, 5, '情绪得分(百分制,满分完全正面)')
    sheet.write(0, 6, '预测的置信度(百分制,满分完全自信)')

    for elm in web_content:
        text = elm['title'] + '。' + elm['abstract']
        baidu_result = client.sentimentClassify(text)
        # print(baidu_result)
        sheet.write(elm['stamp'] + 1, 0, elm['title'])
        sheet.write(elm['stamp'] + 1, 1, elm['url'])
        sheet.write(elm['stamp'] + 1, 2, elm['source'])

        if elm['attribute'] == -1:
            sheet.write(elm['stamp'] + 1, 3, '负面')
        elif elm['attribute'] == 0:
            sheet.write(elm['stamp'] + 1, 3, '中立')
        elif elm['attribute'] == 1:
            sheet.write(elm['stamp'] + 1, 3, '正面')

        if baidu_result['items'][0]['sentiment'] == 0:
            sheet.write(elm['stamp'] + 1, 4, '负面')
        elif baidu_result['items'][0]['sentiment'] == 1:
            sheet.write(elm['stamp'] + 1, 4, '中立')
        elif baidu_result['items'][0]['sentiment'] == 2:
            sheet.write(elm['stamp'] + 1, 4, '正面')
        sheet.write(elm['stamp'] + 1, 5,
                    baidu_result['items'][0]['positive_prob'] * 100)
        sheet.write(elm['stamp'] + 1, 6,
                    baidu_result['items'][0]['confidence'] * 100)
        print("%d Done!\n" % (elm['stamp'] + 1))
        time.sleep(0.25)
    book.save('data/testexcel.xls')
Exemple #3
0
class wordSimTool:
    client = AipNlp(Bai_appid, Bai_apikey, Bai_Skey)

    def getwordsim_api(self, wordA, wordB):
        """
        测量词汇的相似度,词林、百度NLP
        :param wordA: 测量词1
        :param wordB: 测量词2
        :return: 文档dict。
                 包含相似度值(float) (0,1]
        """
        result = self.client.wordSimEmbedding(wordA, wordB)
        return result

    def getsentenceSim(self, s1, s2):
        es1 = s1.encode('utf-8')
        es2 = s2.encode('utf-8')
        return self.client.simnet(es1, es2)
    def __init__(self):
        self.source_path = 'C:\\Users\\baiwt\\Desktop\\'
        self.write_path = self.source_path + 'Similarity.csv'
        self.key20_list = [
            '无害化', '烟气', '污染', '减量化', '滤液', '废弃物', '飞灰', '清洁', '基础设施', '有害物质',
            '环境', '效益', '噪声', '处理工艺', '监测', '风险', '臭气', '补贴', '管理', '法律法规'
        ]
        self.top500_list_path = self.source_path + 'top500.txt'
        self.top500_list = []
        self.top500_list_ge = (line.strip() for line in open(
            self.top500_list_path, 'r', encoding='UTF-8').readlines())
        for wd in self.top500_list_ge:
            self.top500_list.append(wd)

        APP_ID = '10975759'
        API_KEY = 'CNLZcnqL3E6NLeFNGzUE06fY'
        SECRET_KEY = 'b8Z7nQPmASHHhPLjRG6dXKQG7W97DxNk'
        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
Exemple #5
0
def baidu_correct(text: str) -> str:
    """百度纠错api"""
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    client.setConnectionTimeoutInMillis(10000)
    client.setSocketTimeoutInMillis(10000)
    try:
        s = client.ecnet(text)
    except Exception as e:
        print(repr(e))
        return text
    else:
        import time, logging
        time.sleep(0.1)
        logging.info(s)
        if 'error_code' in s.keys():
            print(s)
            return text
        return s.get('item').get('vec_fragment')
Exemple #6
0
def judges(config, text):
    """
    通过三个NLP平台分析文本并投票决定情绪,分别调用百度AI,BosonNLP和腾讯文智的API
    :param config: dict
        {'baidu': [APP_ID, API_KEY, SECRET_KEY],
         'boson': [API_TOKEN],
         'tencent': [SECRET_ID, SECRET_KEY]}
    :param text: string
    :return: string
    """
    default_timeout = 10
    w = Wenzhi(config['tencent'][0], config['tencent'][1], 'gz', 'POST')
    cli = AipNlp(config['baidu'][0], config['baidu'][1], config['baidu'][2])
    bo = BosonNLP(config['boson'][0])
    pool = ThreadPoolExecutor(max_workers=3)
    candidate1 = pool.submit(w.text_sentiment, {'content': text})
    candidate2 = pool.submit(cli.sentimentClassify, text)
    candidate3 = pool.submit(bo.sentiment, text)
    result = [
        candidate1.result(default_timeout),
        candidate2.result(default_timeout),
        candidate3.result(default_timeout)
    ]
    post_votes = [
        result[0]['positive'], result[1]['items'][0]['positive_prob'],
        result[2][0][0]
    ]
    nega_votes = [
        result[0]['negative'], result[1]['items'][0]['negative_prob'],
        result[2][0][1]
    ]
    # return [post_votes, nega_votes]
    pv = [i for i in post_votes if i > 0.5]
    nv = [x for x in nega_votes if x > 0.5]
    result = None
    if len(pv) / len(post_votes) > 1 / 3:
        result = 'P'
    else:
        if len(nv) / len(nega_votes) > 1 / 3:
            result = 'N'
    if result is None:
        return 'N'
    else:
        return result
Exemple #7
0
def init_segment():
    # 按照 4.4 的方式,申请百度云分词,并填写到下面
    APP_ID = "11033091"
    API_KEY = "tSkph8iBxX4fGmRvLf5WSVhN"
    SECRET_KEY = "FxEzuzLcXiG4j506zTefnTlPREuQgGWr"

    from aip import AipNlp
    # 保留如下词性的词 https://cloud.baidu.com/doc/NLP/NLP-FAQ.html#NLP-FAQ
    retains = set(["n", "nr", "ns", "s", "nt", "an", "t", "nw", "vn"])

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    def segment(text):
        '''
        对『任职信息』进行切分,提取信息,并进行一定处理
        '''
        try:
            result = []
            # 调用分词和词性标注服务,这里使用正则过滤下输入,是因为有特殊字符的存在
            items = client.lexer(re.sub('\s', '', text))["items"]

            cur = ""
            for item in items:
                # 将连续的 retains 中词性的词合并起来
                if item["pos"] in retains:
                    cur += item["item"]
                    continue

                if cur:
                    result.append(cur)
                    cur = ""
                # 如果是 命名实体类型 或 其它专名 则保留
                if item["ne"] or item["pos"] == "nz":
                    result.append(item["item"])
            if cur:
                result.append(cur)

            return result
        except Exception as e:
            print("fail to call service of baidu nlp.")
            return []

    return segment
Exemple #8
0
def init_segment():
    #按照 4.4 的方式,申请百度云分词,并填写到下面
    APP_ID = "15993898"
    API_KEY = "ljcRFMa84OyxKSTXtz5YGYd6"
    SECRET_KEY = "GijfE3hqwA3qs76zHPIkVMjq9PdfEOma"

    from aip import AipNlp
    #保留如下词性的词 https://cloud.baidu.com/doc/NLP/NLP-FAQ.html#NLP-FAQ
    retains = set(["n", "nr", "ns", "s", "nt", "an", "t", "nw", "vn"])

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    def segment(text):
        '''
        对『任职信息』进行切分,提取信息,并进行一定处理
        '''
        try:
            result = []
            #调用分词和词性标注服务,这里使用正则过滤下输入,是因为有特殊字符的存在
            items = client.lexer(re.sub('\s', '', text))["items"]

            cur = ""
            for item in items:
                #将连续的 retains 中词性的词合并起来
                if item["pos"] in retains:
                    cur += item["item"]
                    continue

                if cur:
                    result.append(cur)
                    cur = ""
                #如果是 命名实体类型 或 其它专名 则保留
                if item["ne"] or item["pos"] == "nz":
                    result.append(item["item"])
            if cur:
                result.append(cur)

            return result
        except Exception as e:
            print("fail to call service of baidu nlp.")
            return []

    return segment
Exemple #9
0
def get_word_nlp(word):
    """
    是否为消极的
    :param word:
    :return:
    """
    """ 你的 APPID AK SK """
    APP_ID = '**'
    API_KEY = '***'
    SECRET_KEY = '****'

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    """ 调用情感倾向分析 """
    result = client.sentimentClassify(word)

    # 该情感搭配的极性(0表示消极,1表示中性,2表示积极)
    sentiment = result.get("items")[0].get("sentiment")

    return sentiment == 0
Exemple #10
0
 def Text_Categories(self, _title, _content):
     APP_ID = '11082811'
     API_KEY = '8UoMrydDQDuWxClQLgh3HDAt'
     SECRET_KEY = 'CDUQyIpBC8ssom0TRdKBkBpdn8Dho5VE'
     client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
     """ 调用文章分类 """
     title = _title
     content = _content
     result = client.topic(title, content)['item']
     categories = {'lv1': [], 'lv2': []}
     for i in result['lv1_tag_list']:
         if i['score'] > 0.8:
             categories['lv1'].append(i['tag'])
     for i in result['lv2_tag_list']:
         if i['score'] > 0.8:
             categories['lv2'].append(i['tag'])
     if categories['lv1'] == [] and categories['lv2'] == []:
         categories['lv1'] = False
         categories['lv2'] = False
Exemple #11
0
def get_baidu_nlp_client(api):
    random.seed(time.time() * 1000)
    if api == "voice_to_sentence":
        args = random.choice([
            {
            "APP_ID" : '10846751',
            "API_KEY" : 'fQVRC7FP989ZgzCBFt87Ul9p',
            "SECRET_KEY" : 'QWUCl8wlTC07DFoCIVK5MIYtqY1QCnkG',},
            ])
        client = AipSpeech(args["APP_ID"], args["API_KEY"], args["SECRET_KEY"])
    elif api == "sentence_to_words":
        args = random.choice([
            {
            "APP_ID" : '10880420',#jinghui
            "API_KEY" : 'flSG6dAlW15sNd7YGbFUWyMp',
            "SECRET_KEY" : 'KUMAgRFZsGZNLG7e2c3aSUP2joh4cEuq',},
            {
            "APP_ID" : '10854280',#chu8129
            "API_KEY" : 'iG4gHufA41LGTo18pokuPXPP',
            "SECRET_KEY" : 'i5tEDTYV5IddsUZKG7poomKbbDXajuaQ',},
            {
            "APP_ID" : '10864948',#20chu
            "API_KEY" : 'yx9Zc8Rt1LV3eEEq4h6ZGi4M',
            "SECRET_KEY" : 'n9cI3zkVtkBaURsPjg06iFjGt3MNbGp0',},
            {
            "APP_ID" : '10867218',#hong81293
            "API_KEY" : '26am6H5pl4RmQmDL6kymlFXb',
            "SECRET_KEY" : '7dKVvQGrSa70Cr2E9GWw53O8t8kgN3z1',},
            {
             "APP_ID" : '10914857',#yabiao
             "API_KEY" : 'ChjaFZzXKLbWK2m3qU7IeIsz',
             "SECRET_KEY" : '1c8GEoyt2vKMplM0Ykk6Y46GkIswKGhU',},
            {
            "APP_ID" : '10874447',#mingwei
            "API_KEY" : 'qameOwB7cnK9ZwiUnlZ5izvZ',
            "SECRET_KEY" : 'Kc2mBGyZeYAINGLAMVObPz6GbifBEjBV',},
            ])
        logging.debug("args:%s"%args)
        client = AipNlp(args["APP_ID"], args["API_KEY"], args["SECRET_KEY"])

    if "client" in locals():
        return client
Exemple #12
0
def visualize():
    req_data = request.get_json()

    print(req_data['text'])

    # 调用依存句法分析
    nlp = AipNlp(app.config['APP_ID'], app.config['API_KEY'], app.config['SECRET_KEY'])
    deparse_result = nlp.depParser(req_data['text'], req_data.get('options', {}))

    if 'error_code' in deparse_result:
        return jsonify(deparse_result)

    res_data = render_template(
        'dependency_parsing.dot.jinja2',
        text=deparse_result['text'], items=deparse_result['items'], postags=POSTAGS, deprels=DEPRELS
    )

    print(res_data)

    return jsonify(result=res_data)
Exemple #13
0
    def __init__(self):
        # baidu tokenizer
        APP_ID = '15716974'
        API_KEY = 'QZ4ee5tvyLrKCZ5FZib1eDFN'
        SECRET_KEY = 'Zhe7VieQlGeSvGoPbeHdfLLeDF78KOYO'
        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

        #
        # self.text_file_path = "/home/gyzhang/speech_database/blizzard_release_2019_v1/text/text.txt"
        # self.ipa_dict = ld.get_lexicon_dict(lexicon_path='../data/dicts/ipa_m.dict')
        # self.english_dict = ld.get_lexicon_dict(lexicon_path='/home/gyzhang/speech_database/blizzard_release_2019_v1/text/english.dict')
        # self.chinese_syl_dict = ld.get_lexicon_dict(lexicon_path='/home/gyzhang/speech_database/blizzard_release_2019_v1/text/lexicon_char.txt')
        # self.chinese_syl_dict_new = '/home/gyzhang/speech_database/blizzard_release_2019_v1/text/lexicon_chinese_char.txt'
        # self.chinese_syl_dict_new = dict()
        # self.write_syl_phone_dict_flag=False
        # self.wav_dir = "/home/gyzhang/speech_database/blizzard_release_2019_v1/wav_16k"
        self.wav_scp = "../exp/blz/kaldi/wav.scp"
        self.utt2spk = "../exp/blz/kaldi/utt2spk"
        self.text = "../exp/blz/kaldi/text"
        self.write_kaldi = True
Exemple #14
0
def Emotion():
    csvFile = open("newjob.csv", 'w', newline='')  # 创建文件
    writer = csv.writer(csvFile)
    writer.writerow(('评论', '正向指数', '负向指数', '情感分类'))  # 设置表头

    # 配置百度接口
    APP_ID = '16802343'
    API_KEY = 'DWHSIPtzUxP9IRj5F93xIzGS'
    SECRET_KEY = 'I4vrC0x42looiNl1KCSL64HAv5uWt1A5'
    # 配置账号,密码等
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    count = 0
    with open('yzsy.csv', 'r', encoding='utf-8') as f:
        rows = csv.reader(f)
        for r in rows:
            text = r[0].replace('\n', '') if '\n' in r[0] else r[0]
            try:
                # 将数据放到百度中解析
                content = client.sentimentClassify(text)

                text = content["text"],  #获取分析的文本
                positive = str(
                    round(content["items"][0]['positive_prob'] * 100,
                          3)) + "%",  #获取正向情感
                negative = str(
                    round(content["items"][0]['negative_prob'] * 100,
                          3)) + "%",  #获取负向情感
                type_num = content["items"][0]['sentiment'],  #获取情感分析
                if type_num == '2':
                    type = '正向'
                elif type_num == '1':
                    type = '中性'
                else:
                    type = '负向'
                writer.writerow((text, positive, negative, type))
                count += 1
                print('已分析{}条数据'.format(count))
                time.sleep(2)
            except:
                continue
        print('情感分析完毕!')
Exemple #15
0
def do(data, labels) -> float:
    """
        返回正确率
    """
    app_id, api_key, secret_key = _getConfig()
    client = AipNlp(app_id, api_key, secret_key)

    #data, labels = _getTestData()
    corr, count = 0, len(data)

    for i, sent in enumerate(data):
        try:
            res = client.ecnet(sent)  # <- dict
            r = 1 if res['item']['vec_fragment'] == [] else 0
            corr += (1 if labels[i] == r else 0)
        except Exception as e:
            print(e)
            print('_____________________')
            break

    return float(corr) / count
Exemple #16
0
def getSentimentScore(sen):
    APP_ID = '10987797'
    API_KEY = 'cgZ7P9szshd6UcTaQIzOvugS'
    SECRET_KEY = 'xUXD6DYFmYpDwMfXpTkVciWWdBYPX8ZE'
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    client.setConnectionTimeoutInMillis(200)
    client.setSocketTimeoutInMillis(200)

    sentiment_category = {2: 1.0, 0: -1.0, 1: 0.0}

    s = client.sentimentClassify(sen)
    sentiment_score = 0.0

    if 'items' not in s.keys():
        sentiment_score = 0.0
    else:
        sentiment = sentiment_category[s['items'][0]['sentiment']]
        confidence = s['items'][0]['confidence']
        sentiment_score = sentiment * confidence
    # print("got ", sentiment_score)
    return sentiment_score
Exemple #17
0
def baidu_API(text):
    '''
    百度的NLP接口,包括词法句法依存关系等,具体接口在 http://ai.baidu.com/docs#/NLP-Python-SDK/top
    :param string: 输入的句子
    :return:
    '''
    from aip import AipNlp

    # 在百度AI开放平台中下载Python的SDK安装包
    # 创建百度账号后,在百度AI开放平台的控制台中查询,ID 在用户中心,Key和secret在安全中心
    APP_ID = '6b82e45926334c46a8e6b31374d5b43d'  # '你的 App ID'
    API_KEY = 'ba3c712ad3f94ae49327ef2965813b65'  # '你的 Api Key'
    SECRET_KEY = '90f490d485b7418da7d60a5126b51abf'  # '你的 Secret Key'
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    """ 调用依存句法分析 """
    # Query模型:该模型的训练数据来源于用户在百度的日常搜索数据,适用于处理信息需求类的搜索或口语query。
    # Web模型:该模型的训练数据来源于全网网页数据,适用于处理网页文本等书面表达句子。
    options = {}
    options["mode"] = 0 #模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型)
    return client.depParser(text=text,options=options)
Exemple #18
0
def main(arg):
    """ 你的 APPID AK SK """
    APP_ID = '23738220'
    API_KEY = 'E0MDRQpKgrkvNtVl27ZUBHZC'
    SECRET_KEY = 'yxAg1c23N9X0HdrWdGgpzPbWPfIDlr7D'

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    text = get_file_content(arg[0]).decode('utf-8')

    """ 调用通用物体识别 """
    d = client.sentimentClassify(text)
    d1 = d["items"][0]
    s = '{"positive_prob":"'
    s += str(d1['positive_prob']) + '", '
    s += '"confidence":"'
    s += str(d1['confidence']) + '"}'
    print(s)
Exemple #19
0
    def analysisSingleArticle(self, content):
        if content is None:
            return -2
        else:
            content = content.encode('utf-8', 'ignore').decode('gbk', 'ignore')
            try:
                emotion_mes = self.client.sentimentClassify(content)
            except ValueError as e:
                if not self.app_que.empty():
                    app_obj = self.app_que.get()
                    self.client = AipNlp(app_obj['APP_ID'], app_obj['API_KEY'],
                                         app_obj['SECRET_KEY'])
                    emotion_mes = self.client.sentimentClassify(content)
                else:
                    self.logger.debug('Emotion analyse field because of %s' %
                                      e)
                    return None

            itemstr = 'items'
            emotion = -2
            if itemstr not in emotion_mes:
                if emotion_mes['error_msg'] == 'input text too long':
                    index = len(content) // 1011
                    emotion = 0
                    for i in range(0, index):
                        temp = content[i * 1011:(i + 1) * 1011 - 1]
                        try:
                            emotion_mes2 = self.client.sentimentClassify(temp)
                            emotion += emotion_mes2['items'][0]['sentiment']
                        except:
                            self.logger.debug('参数非法')
                            emotion += 0
                    emotion /= index
                    return emotion - 1
            else:
                emotion = emotion_mes['items'][0]['sentiment']
                return emotion - 1

            return emotion
Exemple #20
0
 def sentiment_classify(self, texts, name):
     if os.path.exists("../csv/情感分析" + name.strip('.txt') + ".csv"):
         return
     client = AipNlp(appId=self.APP_ID, apiKey=self.API_KEY, secretKey=self.SECRET_KEY)
     with open("../csv/情感分析" + name.strip('.txt') + ".csv", mode='w') as f:
         csv_write = csv.writer(f)
         csv_write.writerow(["分析文本", "积极情感", "消极情感", "sentiment", "confidence"])
         for item in texts:
             try:
                 result = client.sentimentClassify(text=item)
                 print("正在分析:" + item)
                 csv_write.writerow([result['text'], result['items'][0]["positive_prob"],
                                     result['items'][0]["negative_prob"], result['items'][0]["sentiment"],
                                     result['items'][0]["confidence"]])
                 print("正在写入:" + result['text'] + "\t" + str(result['items'][0]["positive_prob"]) + "\t" + str(
                     result['items'][0]["negative_prob"]) + "\t" + str(result['items'][0]["sentiment"]) + "\t" + str(
                     result['items'][0]["confidence"]))
                 time.sleep(random.randint(0, 1))
                 print("影评:" + name)
             except BaseException as e:
                 print("error:" + str(e))
                 continue
Exemple #21
0
def use_cal(text1):

    APP_ID = '18102862'
    API_KEY = 'igU7dumhhWws35yIMUE6wGRL'
    SECRET_KEY = 'hE9QieKEA3nYUrGIbKVbIdrmEZGsUGgS'

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    use_ls = ["看电影听音乐追剧看番", "办公出差商务表格", "玩打游戏电竞单机网游", "平面设计PS视频剪辑", "日常全能随便都行"]
    name_ls = [
        "use-media", "use-business", "use-gaming", "use-creator", "use-all"
    ]
    """ 调用短文本相似度 """
    score_ls = []
    score_dict = {}
    for i in range(len(use_ls)):
        time.sleep(0.5)
        result = client.simnet(text1, use_ls[i])["score"]
        score_ls.append(result)
        score_dict[name_ls[i]] = score_ls[i]

    return score_dict
    def open_spider(self, spider):
        self.conn = pymysql.connect(host=self.host,
                                    port=self.port,
                                    user=self.user,
                                    passwd=self.passwd,
                                    db='copm',
                                    charset='utf8')
        self.cursor = self.conn.cursor()

        sql = 'select keyword_main, activate from copm.config'
        self.cursor.execute(sql)
        result = self.cursor.fetchall()
        spider.keyword = set()
        for item in result:
            if item[1] != 0:
                spider.keyword.add(item[0])

        sql = 'select word, id from copm.filterword'
        self.cursor.execute(sql)
        result = self.cursor.fetchall()
        spider.filterword = set()
        for item in result:
            spider.filterword.add(item[0])
        sql = 'select name,code from copm.city'
        self.cursor.execute(sql)
        result = self.cursor.fetchall()
        city_names = []
        city_code = {}
        for item in result:
            city_names.append(item[0])
            city_code[item[0]] = item[1]
        self.city_pattern = '|'.join(city_names)
        self.city_code = city_code

        APP_ID = '11576120'
        API_KEY = 'WOSriMMnS9eSKGftEOtx5rb6'
        SECRET_KEY = '0Q6NaE89wbhRRjSVspjiDiiT6ZnGVllE'
        self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
Exemple #23
0
def article_tag():
    try:
        res = request.get_json()
        article_id = res.get('article_id')

        Logging.logger.info('request_args:{0}'.format(res))
        if not article_id:
            return jsonify(errno=-1, errmsg='参数错误,请传入要查询的文章的article_id')

        article = Article.query.get(article_id)
        if not article:
            return jsonify(errno=-1, errmsg='参数错误,该文章不存在')

        docs = mongo_store.articles.find({'title': article.title})
        doc = docs[0]
        title = doc.get('title')
        content_ls = doc.get('content')
        text = ''
        for content in content_ls:
            if content.get('text'):
                text += content.get('text')
        print(text)
        # text = text.encode('gbk')
        client = AipNlp(LibConfig.get_baidu_language_app_id(),
                        LibConfig.get_baidu_language_api_key(),
                        LibConfig.get_baidu_language_secret_key())
        result_tag = client.keyword(title, text)
        print(result_tag)
        result_topic = client.topic(title, text)
        print(result_topic)

        return jsonify(errno=0,
                       errmsg="OK",
                       result_topic=result_topic,
                       result_tag=result_tag)
    except Exception as e:
        Logging.logger.error('errmsg:{0}'.format(e))
        return jsonify(errno=-1, errmsg='网络异常')
Exemple #24
0
def cihui(t):
    APP_ID = '15874915'
    API_KEY = 'TeCFWb6YplxI3uzzizSInu2l'
    SECRET_KEY = 'asSiGgkYB2X0d5S5QbgoCUtkC737jxwW'

    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

    text = t.encode('gbk').decode('gbk')
    """ 调用词法分析 """
    result1 = client.lexer(text)
    keyword = ['原理', '步枪']
    for i in range(len(result1.get('items'))):
        print(
            result1.get('items')[i].get('item'),
            result1.get('items')[i].get('pos'))
    for i in range(len(result1.get('items'))):
        for j in keyword:
            if ((result1.get('items')[i].get('pos') == 'n'
                 or result1.get('items')[i].get('pos') == 'nz')
                    and result1.get('items')[i].get('item') == j):
                print(
                    "把子弹夹插入握把的弹匣内,因为弹夹左侧有个凹洞,和弹匣卡榫固定住后,拉动枪管衬套,衬套里是复进弹簧,当拉动枪管衬套后,到达弹夹上方,挂弹,松手后,受到复进弹簧的弹力,把子弹带动到了枪膛。这时子弹已经插入到位,即枪管的进弹口,此时子弹已在枪膛内。在拉动衬套的同时把击锤也张开,再把保险打开,当扣动扳机后,击锤打击撞针,撞针快速撞击子弹壳底火,子弹壳内的发射药燃烧,产生膨胀气体,将子弹头推出去。"
                )
def looking_cal(text1):
		
	APP_ID = '18102862'
	API_KEY = 'igU7dumhhWws35yIMUE6wGRL'
	SECRET_KEY = 'hE9QieKEA3nYUrGIbKVbIdrmEZGsUGgS'

	client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

	looking_ls = ["轻薄携带小巧精致","商务办公大气公司出差低调沉稳","炫酷帅RGB灯霸气威武"]
	name_ls = ["looking-elegent","looking-business","looking-cool"]


	""" 调用短文本相似度 """
	score_ls = []
	score_dict = {}
	for i in range(len(looking_ls)):
		time.sleep(0.5)
		result = client.simnet(text1, looking_ls[i])["score"]
		score_ls.append(result)
		score_dict[name_ls[i]] = score_ls[i]


	return score_dict
def analyze(number):
    """ 你的 APPID AK SK """
    APP_ID = '10254109'
    API_KEY = 'vnOAq33nhaWqcoTmjfxOOgKI'
    SECRET_KEY = 'praR1key1GSpmZu5P9mzeVHXwDKnjoLO  '

    pos = 0
    nav = 0
    i = 0

    aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    for line in open(number + ".txt", "r", encoding="gbk"):
        aline = line.replace("\r\n", "").strip()
        # if len(aline) != 0:
        try:
            result = aipNlp.sentimentClassify(aline)  #调用百度接口,情感分析
            positive = result['items'][0]['positive_prob']
            nagative = result['items'][0]['negative_prob']

            i += 1
            if positive >= nagative:
                pos += 1
            else:
                nav += 1
            avgpos = pos / i
            navavg = nav / i
            print(i, format(avgpos, ".4f"), format(navavg, ".4f"))
        except:
            pass
            # else:
            #     pass
    plt.bar([1], [avgpos], label=number + u"积极的", color="g")
    plt.bar([2], [navavg], label=number + u"消极的", color="b")

    plt.legend()  # 绘制
    plt.savefig(number + ".jpg")
    plt.show()  # 显示
Exemple #27
0
class MysqlPL(object):
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    conn = None
    cursor = None

    def open_spider(self, spider):
        # 提前在数据库中创建news数据库和new表,包含四个字段
        self.conn = pymysql.Connect(
            host='127.0.0.1',
            port=3306,
            user="******",
            password='******',
            db='news',
        )

    def process_item(self, item, spider):
        title = item["title"]
        content = item['content']
        tag = self.client.keyword(title, content)  # 标签
        first_tag = tag.get('items')[0].get('tag')

        time.sleep(1)
        types = self.client.topic(title, content)  # 类型
        content_type = types.get('item').get('lv1_tag_list')[0].get('tag')
        sql = 'insert into new values ("%s","%s","%s","%s")' % (
            title, content, content_type, first_tag)
        self.cursor = self.conn.cursor()
        try:
            self.cursor.execute(sql)

        except Exception as e:
            self.conn.rollback()

    def close_spider(self, spider):
        self.cursor.close()
        self.conn.close()
Exemple #28
0
def Get_weibo_sim(weibo):
    """ 你的 APPID AK SK """
    APP_ID = '11286782'
    API_KEY = 'LGUPoiUYz8qvTISDa0gFySyl'
    SECRET_KEY = 'GVEOrDHC8Fie9IfNXL3v6Vxzwu2T7aGd'
    client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    """ 如果有可选参数 """
    options = {}
    options["model"] = "CNN"
    """ 带参数调用短文本相似度 """
    sim = 0
    count = 0
    if len(weibo)>1:
        for i in range(0, len(weibo)):
            for j in range(i + 1, len(weibo)):
                try:
                    # print(client.simnet(str(weibo[i]), str(weibo[j]), options))
                    sim = sim + client.simnet(str(weibo[i]), str(weibo[j]), options)['score']
                    count = count + 1
                except:
                    pass
        return sim /(count+1)
    else:
        return 0
Exemple #29
0
def get_morphology(text):
    client = AipNlp(BAIDU_KEY['WORD']['APP_ID'], BAIDU_KEY['WORD']['API_KEY'], BAIDU_KEY['WORD']['SECRET_KEY'])
    results = client.lexer(text)['items']
    word_list = {}
    for result in results:
        if result['ne'] != '':
            if result['item'] in word_list.keys():
                word_list[result['item']] += 1
            else:
                word_list[result['item']] = 1
        elif result['pos'] != 'w' and result['pos'] != 'u' and result['pos'] != 'm':
            if result['item'] in word_list.keys():
                word_list[result['item']] += 1
            else:
                word_list[result['item']] = 1
    word = []
    for i in word_list:
        j = word_list[i]
        if j>3:
            word.append(i)
    print(word)


    print(word_list)
     'gk8pfmwGYWQ1meS4E92NtoGS91IuW8Xq'),
    ('10029045', 'qO5PCgQIXr6H7PGPuMlPxRM2',
     'Wb5GrF8sqAYx7mAWBTAMNElLLSM1bQw1'),
    '10207132 jpmBMNurABuj8Vrxe9GS765S 2tu3zKmAOqXNK4bpBOFGGAb7T9ETlfRm'.split(
    ),
    '10207130 Ts0GItUZOGIVSu8HAA1278zn dd8GU9Acam3Trc2N1RTA2RhHIBu6Xfco'.split(
    ),
    '10207129 XHnLcvsF9ZUxLbEVLACNr56c HGlAg5xXEdGIGP6lPzjUgSIfRHYxC8jl'.split(
    ),
    '10207128 gPfTmFqxtG1RVNG16qrTQSjP sNn6BpGPzByoIFg0CnRtG8V6qKPpR3xd'.split(
    ),
    '9688683 wndZFKVBmUTM5cfMb7C8UaOA T80yWu0WAkbOFoKRVQ9p8lZMzj6rLq7S'.split(
    ),
]
clients = [
    AipNlp(APP_ID, API_KEY, SECRET_KEY) for APP_ID, API_KEY, SECRET_KEY in keys
]
api_url = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag'
t = 0
d = {'success': 0, 'error': 0}
clientID = 0
while True:
    t += 1
    clientID += 1
    clientID %= clients.__len__()
    for i, item in enumerate(
            tdb.meituan_comment.find({
                'baidu_result': None
            }).skip(t * 1000).limit(1000)):
        try:
            it = clients[clientID].commentTag(item['text'])