def feel_analyse(sentence): APP_ID = '10637556' API_KEY = 'rm0HA7EqfQ16HdOZMqwHkho5' SECRET_KEY = '3rM91Nj9Z3aLarTgMqvbexdwl0fN3vNd' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) # text="这个东西很赞" print("开始处理情感:") sentence = ''.join(e for e in sentence if e.isalnum()) if len(sentence) < 1: return 1 resp = client.sentimentClassify(sentence) print(resp) sentiment = resp['items'][0]['sentiment'] return sentiment
'Alias': '', 'UserName': '******', 'HeadImgUrl': '/cgi-bin/mmwebwx-bin/webwxgeticon?seq=689492716&username=@b593cad398c88ef018e27cb8139dd7950f65c65a9008db08113e31cbdc06e716&skey=@crypt_7f691798_74dc8df3117a0cff045623c83c9b8660', 'RemarkPYInitial': '', 'ChatRoomId': 0, 'UniFriend': 0, 'MemberList': <ContactList: []>, 'DisplayName': '', 'RemarkName': '', 'KeyWord': '', 'Sex': 1} """ import os import json import itchat from aip import AipNlp itchat.auto_login(hotReload=True) friends = itchat.get_friends(update=True) client = AipNlp(appId='10984219', apiKey='nnfsI9ndgaq5f6G5G2REl51M', secretKey='YUeH8atHyMfNftTXq82LdfjitU7nxlhE') def friend_data(base_path='/Volumes/zy/data/wx_friend/'): file_name = os.path.join(base_path, 'wx_friend.txt') with open(file_name, 'w', encoding='utf-8') as f: for friend in friends: json.dump(friend, f, ensure_ascii=False) def friend_head_image(base_path='/Volumes/zy/data/wx_friend/images/'): for friend in friends: friend_name = friend['UserName']
class DemospiderPipeline(object): # 从文件中读入并定义语气词 def __init__(self): # 继承之前的构造函数 super().__init__() # 连接到远程数据库 self.database = pymysql.connect(host=settings.MYSQL_HOST, port=3306, db=settings.MYSQL_DBNAME, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWD, charset='utf8', use_unicode=True) # 获取博物馆列表 cursor = self.database.cursor() cursor.execute("select id, name from museum") self.museums = cursor.fetchall() cursor.close() # 使用百度文本分析api self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 从文件中读入停用词 stopwords_csv = pd.read_csv( "F:/Codes/PySpider/demoSpider/jieba/stopwords.txt", index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='utf-8') # quoting=3 全不引用 # 构建停用词列表 self.stopwords = [] for i in range(0, stopwords_csv.shape[0]): self.stopwords.append(stopwords_csv.iloc[i, 0]) # 构建情感词词典 self.word_dict = {} # 从文件中读入情感词 with open( "F:/Codes/PySpider/demoSpider/BosonNLP_sentiment_score/BosonNLP_sentiment_score.txt", mode='r', encoding='UTF-8') as f: lines = f.readlines() for line in lines: index = line.find(" ") word = line[0:index] score = line[index + 1:] if not score.isspace(): if score[0] == '-': score = -float(score[1:]) else: score = float(score) self.word_dict[word] = score # 根据输入的单词得到情感词典中的分数 def getScore(self, word): tf_idf = word[1] word = word[0] return tf_idf * self.word_dict.get(word, 0) # 数据分析与清理 def process_item(self, item, spider): # 从爬取的item中去掉空字符串 if item['content']: # 从文章中寻找是否提到了与某些博物馆有关的某些关系 museums = self.museums for museum_id, museum_name in museums: # 若文本中出现了 if museum_name in item['content']: new_id = item['news_id'] cursor = self.database.cursor() try: cursor.execute( "insert into museum_has_new (museum_id, new_id) values ({0}, {1})" .format(museum_id, new_id)) self.database.commit() except: self.database.rollback() cursor.close() # 将文本的句子进行分词 word_list = jieba.lcut(item['content']) # 分词后去掉语气词、空格和数字 l1 = [] for word in word_list: if not word.isdigit() and not word.isspace( ) and word not in self.stopwords: l1.append(word) # 提取出TF-IDF的语气词频 tf_idf_keyword_list = analyse.extract_tags(item['content'], topK=50, withWeight=True, allowPOS=()) # 去掉空语气词并保留其语气词词频,并计算其情感得分 l2 = [] tf_idf_score = 0 for word in tf_idf_keyword_list: if not word[0].isdigit() and not word[0].isspace( ) and word[0] not in self.stopwords: l2.append(word) tf_idf_score += self.getScore(word) # 根据TF-IDF频率算出tag if (tf_idf_score < 0): tag1 = 0 elif (tf_idf_score < 1): tag1 = 1 else: tag1 = 2 # 根据TextRank()算法提取出关键词 TextRank_keyword_list = analyse.textrank(item['content'], topK=50, withWeight=True) # 去掉空语气词并保留其语气词词频,并计算其情感得分 l3 = [] TextRank_score = 0 for word in TextRank_keyword_list: if not word[0].isdigit() and not word[0].isspace( ) and word[0] not in self.stopwords: l3.append(word) TextRank_score += self.getScore(word) # 根据TextRank算出tag if (TextRank_score < 0): tag2 = 0 elif (TextRank_score < 1): tag2 = 1 else: tag2 = 2 # 通过百度提供的接口方法进行情感倾向提取 sentence = item['content'].replace('\xa0', "").replace( '\xa9', '').replace('\xae', '').replace('\u2022', '').encode('gbk') sentence = sentence.decode('gbk') result = self.client.sentimentClassify(sentence) # 如果解析错误则填写上空值,使得程序不会出错而停止运行 if "error_code" in result.keys(): tag3 = -1 else: data = result['items'] items = data[0] tag3 = items['sentiment'] # 进行投票获取最后结果 if (tag3 != -1): tag = round((tag1 + tag2 + tag3) / 3) else: tag = round(round(tag1 + tag2) / 2) item['content_list'] = l1 item['tag'] = tag # 更新数据库中新闻的标签 sql = "UPDATE new set tag = {} WHERE id = '{}'".format( tag, item['news_id']) cursor = self.database.cursor() try: # 执行SQL语句 cursor.execute(sql) # 提交到数据库执行 self.database.commit() except: # 发生错误时回滚 self.database.rollback() cursor.close() return item else: raise DropItem("Missing content in %s" % item)
from aip import AipNlp import json import time """ 你的 APPID AK SK """ APP_ID = '21663495' API_KEY = 'SAoaTBzVOG76MnuFrmP3F1D7' SECRET_KEY = 'xEq6KG8w73z4ZGp9jM5FprGXSNQDHwl0' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) text = "我叫马维畅" text1 = "百度是一家高科技公司" """ 调用词法分析 """ txt = client.lexer(text) txt1 = client.lexerCustom(text1) print(txt, txt1) #print(json.jumps(txt,ensure_ascii=False, indent=4))
from aip import AipSpeech import os import uuid from aip import AipNlp import to_tuling """ 你的 APPID AK SK """ APP_ID = '11710179' API_KEY = 'Pmnoe7g14eryGgkDfMQAqakk' SECRET_KEY = '2sTfYI0GRhdCKazWQR9L1AqfGwt7FNAc ' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) def audio2text(file_name): # 将wav wma mp3 等音频文件转换为 pcm 无压缩音频文件 cmd_str = "ffmpeg -y -i %s -acodec pcm_s16le -f s16le -ac 1 -ar 16000 %s.pcm" % ( file_name, file_name) # 调用操作自动执行上述命令完成转换 os.system(cmd_str) # 读取音频文件数据 file_content = "" with open(f"{file_name}.pcm", 'rb') as f: file_content = f.read() # 将音频转换成文本 res = client.asr(file_content, 'pcm', 16000, {'dev_pid': 1536}) # 返回文本数据 return res.get('result')[0] def text2audio(text):
from aip import AipNlp #注意:这个运行环境是python3.6 APP_ID = '11793791' API_KEY = 'iaTErc4r5GXNT56tYnlVtVtk' SECRET_KEY = '24P7ImcU7kEaOmoBxDy9giNe6evkYca4' nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY) #创建实例。注意,这里是哎楼L,不是1 res = nlp_client.simnet("你叫什么名字", "你的名字叫什么") #相似度测试 print(res) if res.get("score") > 0.7: print("我叫赵振伟") #相似度测试,智能自动回复
import subprocess from aip import AipSpeech, AipNlp # 语音转文本 TTS技术 import os """ 你的 APPID AK SK 获得百度云接口""" APP_ID = '16981704' API_KEY = 'CeLs5zCuQwWXBhHbrnDGQhc3' SECRET_KEY = 'HIOyvsDRcXKlP95NOY72CAUznUIC6OKZ' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 获取到接口,将用于我们上传语音文本 nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 文本相似度查询接口 #读取文本 def get_file_content(filePath): cmd = f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm" # 将传入的格式转换为pcm # r = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,) # 此方法是异步的 # stdout = r.stdout.read().decode('gbk') # print(stdout) # stderr = r.stderr.read().decode('gbk') # print(stderr) os.system(cmd) #将录音转换格式 try: with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() #发送的是字节流 finally: os.remove(filePath)
class Emotion(object): def __init__(self): APP_ID = '10362966' # '你的 App ID' API_KEY = 'nQWiWR6DzjXsfYjW1yyVy8TB' # '你的 Api Key' SECRET_KEY = 'WpjMdNWYv6TSg2psofaGt4LNW366tvnj' # '你的 Secret Key' self.db = Mysql_DB() self.aip = AipNlp(APP_ID, API_KEY, SECRET_KEY) self.trans = OpenCC('t2s') #模式设置为繁体-简体 def Get_Sentence(self): sql = "select id, Content from tencent_comment where over = 'N' limit " + str( 100) try: Sentence_list = self.db.Query_MySQL(sql) # 读取数据库,获取step行列 for i in Sentence_list: # 执行YYY修改命令,看看参照什么来做基准 self.update_db(i[0]) return Sentence_list except Exception as e: print('query_db函数执行错误' + str(e)) def update_db(self, i): changeY_sql = "update tencent_comment set over = 'Y' where id = " + str( i) try: self.db.Insert_MySQL(changeY_sql) except Exception as e: print('改变YY错误' + str(e)) def Get_Analyse(self): sentence_list = self.Get_Sentence() r = re.compile(ur"[\u0000-\u4dff,\u9fa6-\uffff]") # 删除除了中文以外的一切 for i in sentence_list: try: simple = self.trans.convert(i[1]) #print i[1].strip().encode('utf-8', 'ignore') result = self.aip.sentimentClassify(simple.strip().encode( 'utf-8', 'ignore')) #print result '''print result['items'][0]['positive_prob'] #属于积极类别的概率 print result['items'][0]['confidence'] #分类的置信度 print result['items'][0]['negative_prob'] #属于消极类别的概率 print result['items'][0]['sentiment'] #情感极性分类结果,0为负面,1为中性,2为正面''' s = str(result['items'][0]['sentiment']) p = str(result['items'][0]['positive_prob']) n = str(result['items'][0]['negative_prob']) c = str(result['items'][0]['confidence']) sql = "update tencent_comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s" % ( s, p, n, c) + " where id = " + str(i[0]) self.db.Insert_MySQL(sql) except Exception as e: print('辣鸡百度转码又TM错误了,看老子的' + str(e)) try: simple = self.trans.convert(i[1]) re_s = r.sub(',', simple) result = self.aip.sentimentClassify(re_s.strip().encode( 'utf-8', 'ignore')) s = str(result['items'][0]['sentiment']) p = str(result['items'][0]['positive_prob']) n = str(result['items'][0]['negative_prob']) c = str(result['items'][0]['confidence']) sql = "update tencent_comment set sentiment = %s, positive_prob = %s, negative_prob = %s, confidence = %s" % ( s, p, n, c) + " where id = " + str(i[0]) self.db.Insert_MySQL(sql) except Exception as e: print('草,老子没辙了' + str(e))
def __init__(self, APP_ID, API_KEY, SECRET_KEY): """ Parameters: """ self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
class NLP(object): def __init__(self, APP_ID, API_KEY, SECRET_KEY): """ Parameters: """ self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) def commentTag(self, News_Series, type_=4): """ Parameters: type_:which category you want to match News_Series:新闻文本序列 ----------------------------------------------- Returns: a dataframe which has columns: log_id prop adj sentiment begin_pos end_pos abstract """ options = {} options["type"] = type_ def f(News_Series): for text in News_Series: res = self.client.commentTag(text, options) #返回json格式的结果 print(res) result = yield { "log_id": res["log_id"], "prop": res["items"][0]["prop"], "adj": res["items"][0]["adj"], "sentiment": res["items"][0]["sentiment"], "begin_pos": res["items"][0]["begin_pos"], "end_pos": res["items"][0]["end_pos"], "abstract": res["items"][0]["abstract"] } return result result = f(News_Series) res_df = pd.DataFrame(result) return res_df def sentiment(self, text_series): import time """ Parameters: text:string ------------------------------ Returns: DataFrame which has columns: text--->str; sentiment--->int;表示情感极性分类结果,0:负向,1:中性,2:正向 confidence--->float;表示分类的置信度 positive_prob--->float;表示属于积极类别的概率 negative_prob--->float;表示属于消极类别的概率 ------------------------------ """ df_sentiment = pd.DataFrame() results = [] for text in text_series: results.append(self.client.sentimentClassify(text)) #速度问题 time.sleep(3) #防止请求过快 text = [result["text"] for result in results] sentiment = [result["items"][0]["sentiment"] for result in results] confidence = [result["items"][0]["confidence"] for result in results] positive_prob = [ result["items"][0]["positive_prob"] for result in results ] negative_prob = [ result["items"][0]["negative_prob"] for result in results ] df_sentiment["text"] = text df_sentiment["sentiment"] = sentiment df_sentiment["confidence"] = confidence df_sentiment["positive_prob"] = positive_prob df_sentiment["negative_prob"] = negative_prob return df_sentiment, text, sentiment def keyword(self, title, content): """ Parameters: ------------------------------- Returns: ------------------------------- """ result = self.client.keyword(title, content) return result def topic(self, title, content): """ Parameters: -------------------------------- Returns: -------------------------------- """ result = self.client.topic(title, content) return result
def __init__(self): super(BaiDuNlp, self).__init__() self.APP_ID = '10323015' self.API_KEY = 'zYbYSDZxIFvH4I53ye2jp8qf' self.SECRET_KEY = '3os02bOi9hxZC9775MbKVcYo4BP7GTSm' self.client = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY)
class douban(): def __init__(self): APP_ID = '********' #百度NLP的APP_ID API_KEY = '************************' #百度NLP的API_KEY SECRET_KEY = '***********************************' #SECRET_KEY self.client = AipNlp(APP_ID, API_KEY, SECRET_KEY) #百度API调用 self.lists = [] #用来存储短评的情感分数 self.nums = 0 #计算短评总数 self.font = FontProperties( fname= '/usr/share/fonts/adobe-source-han-serif/SourceHanSerifCN-Heavy.otf' ) #设置plot直方图的字体 def get_content(self): print("开始爬取") for i in range(0, 200, 20): target = 'https://movie.douban.com/subject/26425062/comments?start=' + str( i) + '&limit=20&sort=new_score&status=P' req = requests.get(target) req.encoding = "UTF-8" html = BS(req.text) texts = html.find_all('span', class_='short') for each in texts: with open('yingping.txt', 'a', encoding='UTF-8') as f: f.write(each.string.replace("\n", "") + '\n') self.nums += 1 print("爬取完毕,正在生成词云") def get_word(self): f = open(r'yingping.txt', 'r').read() wordcloud = WordCloud( collocations=False, width=2000, height=1860, margin=2, font_path= r'/usr/share/fonts/adobe-source-han-serif/SourceHanSerifCN-Heavy' ).generate(f) plt.imshow(wordcloud) plt.axis("off") plt.show(self) wordcloud.to_file('result.png') print("词云已生成") def qinggan(self): with open('yingping.txt', 'r', encoding='UTF-8') as f: line = f.readline() i = 1 print("正在分析情感") while line: num = self.client.sentimentClassify( line.encode("gbk", 'ignore').decode( "gbk", "ignore"))["items"][0]["positive_prob"] self.lists.append(num) sys.stdout.write("已分析:%.2f%%" % float(100 * (i + 1) / self.nums) + '\r') sys.stdout.flush() line = f.readline() i += 1 sys.stdout.write("已下载:%.2f%%" % float(100 * (i + 1) / self.nums) + '\n') print('分析完成') def get_hist(self): fig = plt.figure() ax = fig.add_subplot(111) ax.hist(self.lists, bins=10) plt.title('武林怪兽前两百条短评情感', fontproperties=self.font) plt.xlabel('情感积极度', fontproperties=self.font) plt.ylabel('数量', fontproperties=self.font) plt.show()
from aip import AipNlp APP_ID = '10437623' API_KEY = 'Y1bwwQaaGZhcCW1aYl12cIeD' SECRET_KEY = 'qMGZkOPiegiVVpyzh7AySBG2ubY0uY1V' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) result = client.lexer('我想要下午去看一场银翼杀手') print(type(result))
def GetSentenceUnderstoodView(request): # import json if request.method == 'POST': # ju=json.loads(request.body) body1 = json.loads(str(request.body, encoding="utf-8")) client = AipNlp(APP_ID, API_KEY, SECRET_KEY) data = client.lexer(body1['content']) classified = {'verb': [], 'noun': [], 'time': []} listAll = [] locationList = ['nz', 'n', 'nw', 'nt', 'nr', 'LOC', 'ORG'] j = 0 while j < len(data['items']): if data['items'][j]['pos'] == 'v': listAll.append(data['items'][j]) if (data['items'][j]['pos'] in locationList) or (data['items'][j]['ne'] in locationList): listAll.append(data['items'][j]) if data['items'][j]['pos'] == 'TIME': pass j = j + 1 filter = ['想', '想要', '要', '还要'] # criticalVerbs = ["去看", "看", "去", "回", "到", "唱", "参加", "开会", "上班", "上学", "上课", "买", "喝", "吃", "玩"] i = 0 GeneralList = [] while i < len(listAll): # if (i+1)!=len(listAll): # print(listAll[i]['item']) if i != len(listAll) - 1: if (listAll[i]['pos'] == 'v') and (listAll[i + 1]['pos'] != 'v') and (listAll[i]['item'] not in filter): classified['verb'].append(listAll[i]) if (listAll[i]['pos'] == 'v') and (listAll[i + 1]['pos'] == 'v') and (listAll[i]['item'] not in filter): if (listAll[i]['item'] == '去'): pass else: classified['verb'].append(listAll[i]) GeneralList.append(classified) classified = {'verb': [], 'noun': [], 'time': []} # print(classified) if i == len(listAll) - 1: if (listAll[i]['pos'] == 'v') and (listAll[i]['pos'] not in filter): classified['verb'].append(listAll[i]) GeneralList.append(classified) classified = {'verb': [], 'noun': [], 'time': []} if i != len(listAll) - 1: if (listAll[i]['pos'] in locationList) or (listAll[i]['ne'] in locationList): classified['noun'].append(listAll[i]) if (listAll[i + 1]['pos'] not in locationList) and (listAll[i + 1]['ne'] not in locationList): GeneralList.append(classified) classified = {'verb': [], 'noun': [], 'time': []} if i == len(listAll) - 1: if (listAll[i]['pos'] in locationList) or (listAll[i]['ne'] in locationList): classified['noun'].append(listAll[i]) GeneralList.append(classified) classified = {'verb': [], 'noun': [], 'time': []} i = i + 1 # print(GeneralList) def getlocation(str1): pass # if (str1=='吃'): # request() def OutputNaive(data, GeneralList): start, end = 0, 0 a = 0 naiveList = [] while a < len(GeneralList): naiveList.append([[], [], []]) print('Part', a + 1) start = end endList = [] v = 0 while v < len(GeneralList[a]['verb']): naiveList[a][0].append(GeneralList[a]['verb'][v]['item']) endList.append(GeneralList[a]['verb'][v]['byte_offset']) v = v + 1 n = 0 if len(GeneralList[a]['noun']) == 0: end = 100 while n < len(GeneralList[a]['noun']): naiveList[a][1].append(GeneralList[a]['noun'][n]['item']) endList.append(GeneralList[a]['noun'][n]['byte_offset']) n = n + 1 c = 0 end = max(endList) while c < len(data['items']): if start <= data['items'][c]['byte_offset'] <= end: if data['items'][c]['ne'] == 'TIME' or ( data['items'][c]['pos'] == "m" and '点' in data['items'][c]['basic_words']): naiveList[a][2].append( String2TimeObj(data['items'][c]['item'])) c += 1 a = a + 1 return naiveList outputList = OutputNaive(data, GeneralList) # print(outputList) def p2j(i): return json.dumps(i.isoformat()) def extractAllowedTime(action): try: return [action[2][0]["startTime"], action[2][0]["endTime"]] except: return [datetime.datetime.now() for i in range(1)] def ParseAction(action): actionObj = { "type": "togo" if (action[0][0] == "去") or (action[0][0] == "到") else "todo", "description": action[0] + action[1], "allowedTime": [p2j(i) for i in extractAllowedTime(action)] } return actionObj def Naive2JSON(naiveList): node = {"goals": [ParseAction(action) for action in naiveList]} return node print(Naive2JSON(outputList)) return HttpResponse(Naive2JSON(outputList))
# 引入NLP SDK from aip import AipNlp import aip # 定义常量 APP_ID = '9839224' API_KEY = '38aM2cGHnGXgfjwPgNv3hgHN' SECRET_KEY = 'ze0DckCR2GTpFcz8LX17L61Ec8NV9Bc7' # 读取图片 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 初始化AipNlp对象 aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 调用分词接口 result = aipNlp.wordseg('你好百度') print result # 定义参数变量 option = {'lang_id': 1} # 调用分词接口 result = aipNlp.wordseg('你好百度', option) print result result = aipNlp.wordpos('百度') print result
# Redis pool = redis.ConnectionPool( host=os.environ.get('OPENSHIFT_REDIS_HOST', 'localhost'), port=int(os.environ.get('OPENSHIFT_REDIS_PORT', '16379')), password=os.environ.get('REDIS_PASSWORD', None)) app.redis = redis.StrictRedis(connection_pool=pool) # 百度大脑接口 APP_ID = os.environ.get('BAIB_ID', None) API_KEY = os.environ.get('BAIB_KEY', None) SECRET_KEY = os.environ.get('BAIB_SECRET', None) # 初始化AipNlp对象 if APP_ID and API_KEY and SECRET_KEY: app.aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) else: app.aipNlp = None from views import * from user import user_page, mima_page from mindmappage import map_page from oversea import uni_major_page, research_page from reciteword import recite_word_page from talkerchu import talkerchu_page from course import tutorial_page, practice_page, answer_checker app.register_blueprint(map_page) app.register_blueprint(user_page) app.register_blueprint(tutorial_page) app.register_blueprint(practice_page)
# -*-coding:utf-8-*- from pymongo import MongoClient from aip import AipNlp import json import time import datetime """ 你的 APPID AK SK """ APP_ID = '11508916' API_KEY = 'FegRIyeMcFxmrbp0435XjPGW' SECRET_KEY = 'm9hO7Nu9qgf3SvrAsfvZrv9ETZMlHkGO' client_AipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) def stopwordslist(filepath): stopwords = [ line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines() ] return stopwords #建立MongoDB数据库连接 client = MongoClient('localhost', 27017) #连接所需数据库,test为数据库名 db = client.FiveG_news #连接所用集合,也就是我们通常所说的表,test为表名 collection = db.xinChuang_topic #接下里就可以用collection来完成对数据库表的一些操作
def load_model(file_path): model = None with open(file_path, 'rb') as f: model = pickle.load(f) return model from aip import AipNlp app_id = 'XXXX' api_key = 'XXXX' secret_key = 'XXXX' client = AipNlp(app_id, api_key, secret_key) test_data = load_data('test_data_remove_spell.txt') train_data = load_data('train_data_remove_spell.txt') def correct(data, text_out_path, bin_out_path): responses = [] texts = [] i = 0 for line in data: if i % 100 == 0: print(i) i += 1 response = client.ecnet(line) responses.append(response)
class BaiduNlpApi: __APP_ID = '16222729' __API_KEY = 'dQ5XOnC1aV8KXWi5Yqj8MbrB' __SECRET_KEY = '4WvLdHbBGiIb9yT9lEZvS2hGoqLM6mPr' # 又申请了一个 __APP_ID_1 = '16236180' __API_KEY_1 = 'IDfyuuxE381P8wHVGiZiUtkx' __SECRET_KEY_1 = 'T9z0AE6hEZeDrlvyrsafqHox9TnwCNxN' __APP_IDS = ['16222729', '16236180'] __API_KEYS = ['dQ5XOnC1aV8KXWi5Yqj8MbrB', 'IDfyuuxE381P8wHVGiZiUtkx'] __SECRET_KEYS = [ '4WvLdHbBGiIb9yT9lEZvS2hGoqLM6mPr', 'T9z0AE6hEZeDrlvyrsafqHox9TnwCNxN' ] def __init__(self, texts, id_num=0): self.client = AipNlp(self.__APP_IDS[id_num], self.__API_KEYS[id_num], self.__SECRET_KEYS[id_num]) self.texts = texts def set_texts(self, texts): self.texts = texts return self def senti_auto_split(self): def get_senti_api(text): for retry in range(10): try: ret = self.client.sentimentClassify(text) senti_items = ret["items"][0] return senti_items except Exception as e: if 'ret' in locals().keys() and 'error_code' in ret: if ret['error_code'] in [18]: time.sleep(1) continue print('err', ret['error_code'], e, text) time.sleep(1) continue else: print('err', e, text) time.sleep(10) continue else: return None senti_results = [] for text in self.texts: text_len = len(text) if text_len < 1000: a_text_list = [text] else: s_ = 0 a_text_list = [] for offset in range(1000, text_len + 1, 1000): a_text_list.append(text[s_:offset]) s_ = offset if s_ < text_len: a_text_list.append(text[s_:text_len]) total_pos = 0 total_neg = 0 for frag_cnt, frag in enumerate(a_text_list, start=1): ret = get_senti_api(frag) if ret: total_pos += ret["positive_prob"] total_neg += ret["negative_prob"] else: total_pos = -1 total_neg = -1 break else: total_pos /= frag_cnt total_neg /= frag_cnt print(total_pos, total_neg, text) senti_results.append([total_pos, total_neg]) return senti_results # 情感分析接口 def senti_by_sdk(self, probs_only=True): """ +sentiment 是 number 表示情感极性分类结果, 0:负向,1:中性,2:正向 +confidence 是 number 表示分类的置信度 +positive_prob 是 number 表示属于积极类别的概率 +negative_prob 是 number 表示属于消极类别的概率 :return: """ senti_results = [] for text in self.texts: senti_items = [] n = 10 while n > 0: try: ret = self.client.sentimentClassify(text) senti_items = ret["items"][0] break except Exception as e: print(e) n -= 1 continue if senti_items: if not probs_only: senti_results.append(senti_items) else: senti_results.append([ senti_items["positive_prob"], senti_items["negative_prob"] ]) else: if not probs_only: senti_results.append({ 'positive_prob': -1, 'confidence': 0, 'negative_prob': -1, 'sentiment': -1 }) else: senti_results.append([-1, -1]) return senti_results # 分类 - 仅返回最终大类 def category_by_sdk(self, texts_data): def get_senti_api(text): title, text = text for retry in range(10): try: ret = self.client.topic(title, text) print(ret, flush=True) category_info = ret["item"]['lv1_tag_list'][0] return category_info except Exception as e: if 'ret' in locals().keys() and 'error_code' in ret: if ret['error_code'] in [18]: time.sleep(1) continue elif ret['error_code'] in [282131]: # text = text[:50] # title不超过80字节 - 即20个汉字 title = title[:20] time.sleep(1) continue print('err', ret["error_code"], e, text) time.sleep(1) continue else: print('err', e, text) time.sleep(10) continue else: return None cat_results = [] for title, text in texts_data: text_len = len(text) if text_len < 1000: a_text_list = [[title, text]] else: s_ = 0 a_text_list = [] for offset in range(1000, text_len + 1, 1000): a_text_list.append([title, text[s_:offset]]) s_ = offset if s_ < text_len: a_text_list.append([title, text[s_:text_len]]) category_total = {} for frag_cnt, frag in enumerate(a_text_list, start=1): ret = get_senti_api(frag) if ret: if ret['tag'] in category_total: category_total[ret['tag']] += ret['score'] else: category_total[ret['tag']] = ret['score'] if len(category_total) == 0: cat_results.append(-1) else: ret_cat = max(category_total, key=category_total.get) print(ret_cat, category_total[ret_cat], text) cat_results.append(ret_cat) return cat_results # 分类 - 返回大类和子类 类别信息+概率 def category_with_more_info(self, texts_data): def deal_with_ret(ret_data): """ @功能:将返回数据格式,转换成最终存储入库的格式 :param ret_data: :return: """ aim_dict = dict() if ret_data is not None: category_info = ret_data["item"] aim_dict["category"] = category_info["lv1_tag_list"][0]["tag"] aim_dict["cate_score"] = category_info["lv1_tag_list"][0][ "score"] aim_dict["sub_cates"] = category_info["lv2_tag_list"] else: aim_dict["category"] = -1 aim_dict["cate_score"] = -1 aim_dict["sub_cates"] = [] return aim_dict def get_cate_api(text): """ @功能:调接口分类 :param text:[title, content] :return: """ title, content = text for retry in range(10): try: ret = self.client.topic(title, content) aim_dict = deal_with_ret(ret) return aim_dict except Exception as e: if 'ret' in locals().keys() and 'error_code' in ret: # 请求超过QPS限额 if ret["error_code"] == 18: time.sleep(1) continue # # 文本超限 - 在外侧处理 # elif ret["error_code"] == 282131: # # 正文在外部切段处理,这里报错一定是因为标题超限 # title = title[:20] # continue print("err", ret["error_code"], e, text) time.sleep(1) continue else: print("err", e, text) time.sleep(10) continue else: return None def seg_text(title, content): """ @功能:把文本按长度限制切段 :param title: :param content: :return: """ CONTENT_LIMIT = 2000 TITLE_LIMIT = 20 text_segments = [] title = title[:TITLE_LIMIT] for c in range(0, len(content), CONTENT_LIMIT): text_segments.append([title, content[c:c + CONTENT_LIMIT]]) return text_segments def merge_same_subcates(subcates_list): """ @功能:相同的二级分类合并(键保留一个,概率相加) - 针对长文本切段分类的情况可能出现 :param subcates_list: :return: """ if not len(subcates_list): subcates_list_ = subcates_list else: df_subcates = pd.DataFrame(subcates_list) df_subcates_gb = df_subcates.groupby( "tag", as_index=False).agg({"score": "sum"}) df_subcates_gb = df_subcates_gb.sort_values( by="score", ascending=False).reset_index(drop=True) subcates_list_ = df_subcates_gb.to_dict(orient="records") return subcates_list_ def deal_seg_results(cates_info_list): """ @功能:根据切段了的分类结果,融合成原文本的结果 :param cates_info_list: :return: """ df_segs = pd.DataFrame(cates_info_list) df_segs_groupby = df_segs.groupby("category", as_index=False).agg({ "cate_score": "sum", "sub_cates": lambda x: sum(x, []) }) df_segs_groupby = df_segs_groupby.sort_values( by="cate_score", ascending=False) # .reset_index(drop=True) # sub_cates去重 df_segs_groupby["sub_cates"] = df_segs_groupby["sub_cates"].apply( lambda x: merge_same_subcates(eval(x)) if isinstance(x, str) else merge_same_subcates(list(x)) if isinstance(x, np.ndarray) else merge_same_subcates(x)) final_cate_info = df_segs_groupby.iloc[0].to_dict() return final_cate_info final_cates = [] for title, content in texts_data: texts = seg_text(title, content) seg_cates_results = [] for text_ in texts: aim_cate_dict = get_cate_api(text_) seg_cates_results.append(aim_cate_dict) cate_dict = deal_seg_results(seg_cates_results) final_cates.append(cate_dict) return final_cates def newsSummary(self, max_summary_len=100): def get_newsSummary_api(text): for retry in range(10): try: ret = self.client.newsSummary(text, max_summary_len) summary = ret["summary"] return summary except Exception as e: if 'ret' in locals().keys() and 'error_code' in ret: if ret['error_code'] in [18]: time.sleep(1) continue print('err', ret["error_code"], e, text) time.sleep(5) continue else: print('err', e, text) time.sleep(30) continue else: return None summary_results = [] for text in self.texts: if len(text) > 2000: # summary_results.append(None) # continue text = text[:2000] ret = get_newsSummary_api( text.encode("GBK", errors='ignore').decode('GBK')) summary_results.append(ret) return summary_results
def App(): APP_ID = '11169559' API_KEY = '7fPVe5wjK6E3yBCLRB0wYgFZ' SECRET_KEY = 'uIcvCi5YyaYG6tg5pu7IvgObCC2vSD7b' return AipNlp(APP_ID, API_KEY, SECRET_KEY)
def __init__(self, texts, id_num=0): self.client = AipNlp(self.__APP_IDS[id_num], self.__API_KEYS[id_num], self.__SECRET_KEYS[id_num]) self.texts = texts
@contact: [email protected] @file: word2vector.py @time: 2018/12/9 23:34 @desc: ''' from aip import AipNlp """ 你的 APPID AK SK """ APP_ID = '11395257' API_KEY = 'xVbGoPnQGyoffrv0YKBvHSS6' SECRET_KEY = 'oZXwmwD2CTLc8bh9NKPzZM2LxrHySzOB' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) word1 = "张飞" dict_zhangfei = {} word2 = "关羽" dict_liubei = {} """ 调用词向量表示 """ dict_zhangfei = client.wordEmbedding(word1) print(dict_zhangfei) dict_liubei = client.wordEmbedding(word2) print(dict_liubei) vector_zhangfei = dict_zhangfei['vec'] vector_liubei = dict_liubei['vec']
from aip import AipSpeech from aip import AipNlp import os import tuling from uuid import uuid4 #注意:ffmpeg,目前音乐领域最牛逼的转换音频视频的软件工具,转换音频格式 #注意:这个运行环境是python3.6 APP_ID = '11793791' API_KEY = 'iaTErc4r5GXNT56tYnlVtVtk' SECRET_KEY = '24P7ImcU7kEaOmoBxDy9giNe6evkYca4' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #将语音识别成汉字的实例 nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY) #将汉字转换成语音 # # 读取文件 # filePath = "wyn.m4a" # # def get_file_content(filePath): #文件格式转换 os.system( f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm" ) #环境python3.6.这句命令很重要,转变文件格式 with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(file_name): # 识别本地文件 liu = get_file_content(file_name)
def calDocScore(text1, text2): APP_ID = "APP_ID" API_KEY = "API_KEY" SECRET_KEY = "SECRET_KEY" client = AipNlp(APP_ID, API_KEY, SECRET_KEY) return client.simnet(text1, text2)['score']
def load_data(self): client = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY) return client
# -*- coding:utf-8 from flask import Flask, request, redirect, url_for from mongoengine import * import json import re Info = {'addr': '127.0.0.1', 'port': 27017, 'database': 'Sina'} from aip import AipNlp """ 你的 APPID AK SK """ APP_ID = '10799517' API_KEY = 'xzMsCEd2ZkGwEEa8aiyRWGO8' SECRET_KEY = 'QtCUG2sBHSvp4LegMO7XzsEwBOhhXGBe ' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) class UserInfo(Document): """ 个人信息 """ _id = StringField() # 用户ID NickName = StringField() # 昵称 Gender = StringField() # 性别 Province = StringField() # 所在省 City = StringField() # 所在城市 Signature = StringField() # 个性签名 Birthday = DateTimeField() # 生日 Num_Tweets = IntField() # 微博数 Num_Follows = IntField() # 关注数 Num_Fans = IntField() # 粉丝数 Sex_Orientation = StringField() # 性取向 Marriage = StringField() # 婚姻状况
# encoding:utf-8 from pymongo import MongoClient from aip import AipSpeech, AipNlp # Mongodb 数据库配置 MONGOCLIENT = MongoClient("127.0.0.1", 27017) MONGO_DB = MONGOCLIENT["godzilla"] from redis import Redis # redis 缓存配置 REDIS_DB = Redis("127.0.0.1", 6379, db=7) RESPONSE_DATA = {"CODE": 0, "MSG": "", "DATA": {}} MUSIC_DIR = "Music" COVER_DIR = "Cover" QR_CODE_DIR = "Qrcode" CHAT_DIR = "Chat" PARTS_DIR = "Parts" # 固定文件, APP_ID = '16523814' API_KEY = 'etju95SSryWpnWqhvAXXKSB6' SECRET_KEY = '8vjuvGKZCkgi4ripxDQhqi6QGL1p70yl' AI_CLIENT = AipSpeech(APP_ID, API_KEY, SECRET_KEY) NLP_CLIENT = AipNlp(APP_ID, API_KEY, SECRET_KEY) from xpinyin import Pinyin PINYIN = Pinyin()
data['userInfo']['userId'] = uid response = requests.post(tu_ling_api_url, json=data) # 注意点发json数据,使用的是json而非data result = response.json().get('results')[0]['values']['text'] return result """百度语音合成和语音识别""" APP_ID = "14951495" API_KEY = 'IUpYavuVCL2LDrxfofL3gGQN' SECRET_KEY = 'qWq4ukxMF9cnfGs6H88vqN4qpnG5RgMB' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) client_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) # 对话的判断,词义等 def text_to_audio(text): """文本转换成音频:百度语言合成 接口文本限制在1024个字节内 生成一个音频文件 """ audio_file_name = uuid.uuid4() # 宇宙唯一 result = client.synthesis( text, 'zh', 1, { 'spd': 4, # 语速
from aip import AipNlp import json APP_ID = '10629064' API_KEY = 'wtHkAYvPEHVWlfyUDfp4Nzco' SECRET_KEY = 'CN0FRSM2CjksFeBFOoBoV4GC4VSB5wRE' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) f = open('TCM.csv') lines = f.readlines() values = {} for line in lines: raw = line.split(',') for r in raw: r = r.strip() values[r] = 1 values = list(values.keys()) values = sorted(values) values = [v for v in values if len(v) >= 1] #values = values[:20] total = len(values) * (len(values) - 1) / 2 result = {} f = open("baidu_api", 'r') for line in f.readlines(): r = json.loads(line) #print(r.keys()[0]) result.update(r) print("load old result:", len(result)) f.close()
# -*- coding: utf-8 -*- """ Created on Tue Apr 6 14:12:33 2021 @author: Neal LONG """ from aip import AipNlp """ 你的 APPID AK SK """ APP_ID = '17788161' API_KEY = 'cQLWahrXbsindbL43shb2jsr' SECRET_KEY = '0bNITwAVfIxyrVh6M9H8Mo149BtqbGcZ' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) text1 = "苹果是一家伟大的公司" text2 = "这家公司高存高贷" print(client.sentimentClassify(text1)) print(client.sentimentClassify(text2))
# encoding: utf-8 from aip import AipNlp import json APP_ID = '10437334' API_KEY = 'wnbWILkse5iQBMlfc0S9xrFx' SECRET_KEY = '7dwOvserOKXL2560DDusDsFg5VXpu9t9' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) data = client.lexer('6点去食堂吃早饭8点去教室上课12点去长泰看电影14点去公交车站坐公交车') # with open("./data.json",'r') as load_f: # data = json.load(load_f) print(data) classified={'verb':[],'noun':[]} listAll=[] locationList=['nz','n','nw','nt','nr','LOC','ORG'] j=0 while j<len(data['items']): if data['items'][j]['pos']=='v': listAll.append(data['items'][j]) if (data['items'][j]['pos']in locationList ) or (data['items'][j]['ne']in locationList ): listAll.append(data['items'][j]) if data['items'][j]['pos']=='TIME': pass j=j+1 filter=['想','想要','要','还要'] # criticalVerbs = ["去看", "看", "去", "回", "到", "唱", "参加", "开会", "上班", "上学", "上课", "买", "喝", "吃", "玩"] i=0; GeneralList=[] while i<len(listAll):
from aip import AipNlp APP_ID = '10637556' API_KEY = 'rm0HA7EqfQ16HdOZMqwHkho5' SECRET_KEY = '3rM91Nj9Z3aLarTgMqvbexdwl0fN3vNd' client = AipNlp(APP_ID, API_KEY, SECRET_KEY) text="我们这byd电动🚕报价20+,zf补10+" text = ''.join(e for e in text if e.isalnum()) resp = client.sentimentClassify(text) sentiment = resp['items'][0]['sentiment'] print(resp) # pprint(resp) print("分析的文本为:",text) print("情感分析结果为:",end='') if sentiment == 0: print(sentiment,"负向") elif sentiment ==1: print(sentiment,"中性") else: print(sentiment,"正向") # if len(one) > 1: # fee = feel_analyse(one[1]) # if fee == 0: # one.append("负向") # elif fee == 1: # one.append("中性") # else:
'code':0, 'msg':'', 'data':{} } # 联图配置 LT_URL = "http://qr.liantu.com/api.php?text=%s" # 百度AI配置 from aip import AipSpeech, AipNlp APP_ID = '15483561' API_KEY ='PGop0sbEIlUd7fjP6ZEI4G0k' SECRET_KEY = 'BAsacHXVOgbh7c7Gncatx0XSObyicOdU' NLP = AipNlp(APP_ID, API_KEY, SECRET_KEY) SPEECH = AipSpeech(APP_ID, API_KEY, SECRET_KEY) VOICE = { 'vol':5, 'spd':4, 'pit':6, 'per':3 } # 图灵配置 TULING_STR = args = { "reqType": 0, "perception": { "inputText": { "text": '%s'