def bad_words_filter(img_name): base_name = img_name.split('\\')[-1] text = ' ' with open('data/results/' + 'res_{}.txt'.format(base_name.split('.')[0]), 'r') as f: # text = f.read().split('\n') text = ' ' # for i in text: # temp += i lines = f.readlines() for line in lines: str_arr = line.strip().split(',') if len(str_arr) > 4: for word in str_arr[4:]: text += (word + ',') with open('data/results/' + 'res_{}.txt'.format(base_name.split('.')[0]), 'a') as f: client = AipImageCensor(appId='18097522', apiKey='bgeRXmpblbOMocErTxvMCiOF', secretKey='SyRmwoFGsKXqC3LvAAYwlDlf1BckBOwG') if client.antiSpam(text)['result']['spam'] == 0: f.write('无敏感信息') else: f.write('待检测的文本里面含有侮辱、色情、暴力和政治敏感词汇。\n') for i in client.antiSpam(text)['result']['reject']: if (len(i['hit']) != 0): f.write(str(i['hit']))
def test_article_filter(): try: res = request.get_json() article_id = res.get('article_id') article_images = res.get('article_images') Logging.logger.info('request_args:{0}'.format(res)) if not article_id: return jsonify(errno=-1, errmsg='参数错误,请传入要查询的文章的article_id') article = Article.query.get(article_id) if not article: return jsonify(errno=-1, errmsg='参数错误,该文章不存在') docs = mongo_store.articles.find({'title': article.title}) doc = docs[0] article_dict = dict() content = doc.get('content') title = article.title.encode("utf-8") article_dict['title'] = title article_dict['content'] = content obj = SensitiveFilter() str11 = ''.join( [item.get('text') for item in content if item.get('text')]) text = {'content': str11} txt_data = obj.content_check(text) if txt_data.get('errcode') == 40001: redis_store.delete('access_token') txt_data = obj.content_check(text) Logging.logger.info('res_data:{0}'.format(txt_data)) APP_ID = '15791531' API_KEY = 'kajyVlP73XtSGBgoXDIHH5Za' SECRET_KEY = 'u2TClEW6LaHIIpRNdFcL2HIexcgG1ovC' client = AipImageCensor(APP_ID, API_KEY, SECRET_KEY) txt_resp = client.antiSpam(str11) Logging.logger.info('txt_resp:{0}'.format(txt_resp)) for img in article_images: img_resp = client.imageCensorUserDefined(img) print(img_resp) Logging.logger.info('img_resp:{0}'.format(img_resp)) # img_data = obj.img_check(img) # print img_data return jsonify(errno=0, errmsg="OK", data=txt_data) except Exception as e: Logging.logger.error('errmsg:{0}'.format(e)) return jsonify(errno=-1, errmsg='文章详情查询失败')
class TextReview(): def __init__(self): self.client = AipImageCensor(APP_ID, API_KEY, SECRET_KEY) def recognize(self, text): ans = self.client.antiSpam(text) if ('error_msg' in ans.keys()): print(ans['error_msg']) return 0 ans = ans['result'] if (ans['reject'] == [] and ans['review'] == []): #pass return 0 return 1 '''
class BaiduAntiSpam(object): def __init__(self, api): """ :param api: ['15052846', 'SiU9AAGaZn2Zja7d8iSVqce5', 'P6NZ07ROvKTXFnSlDMmH4hf1smOxbfAA'] """ self.api = api self.client = AipImageCensor(*self.api) @retry(tries=3, delay=2) def anti_spam(self, text): _ = self.client.antiSpam(text) _['log_id'] = text return _ def get_result(self, file, corpus): with open(file, 'a') as f: for s in tqdm_notebook(corpus): res = self.anti_spam(s) f.writelines("%s\n" % res)
from aip import AipImageCensor """ 你的 APPID AK SK """ APP_ID = '11156578' API_KEY = '3K73kH6H4aGoZbUrE1N0oTO5' SECRET_KEY = 'YoL5g6BCnWG4mQvEo0TjyDPozlySdDRp' client = AipImageCensor(APP_ID, API_KEY, SECRET_KEY) result = client.antiSpam('民警提醒说,此类骗局中,通常骗子要求汇款的理由包括“发红包”、“买礼物”、“生病就医”、“凑路费”、“生意需要资金”、“见面需要彩礼”等各种理由。因此,微信交友遇到这样情形要小心防骗') print(result)
def get_check_json(cls, content): client = AipImageCensor(cls.APP_ID, cls.API_KEY, cls.SECRET_KEY) return client.antiSpam(content=content)
class ContentVerify: def __init__(self): self.client = AipImageCensor( current_app.config['APP_ID'], current_app.config['API_KEY'], current_app.config['SECRET_KEY'] ) def verify_uploaded_images(self, img): if isinstance(img, str): _, suffix = os.path.splitext(img) if not suffix == '.gif': result = self.client.imageCensorUserDefined(img) else: result = self.client.antiPornGif(img) else: filename = img.filename _, suffix = os.path.splitext(filename) if not suffix == '.gif': result = self.client.imageCensorUserDefined(img.read()) else: result = self.client.antiPornGif(img.read()) msg = self.extract_msg(result, suffix) ok_or_not = self.is_ok(msg, suffix) return msg, ok_or_not def verify_uploaded_avatar(self, avatar): filename = avatar.filename _, suffix = os.path.splitext(filename) result = self.client.faceAudit(avatar.read()) msg = result['result'][0]['data']['antiporn'] if "conclusion" in msg: if not msg['conclusion'] == "色情": return '', True else: return msg['conclusion'] + ",上传失败!", False else: return '审核失败,请重试!', False def verify_text(self, text): result = self.client.antiSpam(text) spam_code = result['result']['spam'] if spam_code == 0: return '', spam_code, True elif spam_code == 1: return "涉及敏感词,审核未通过,提交失败!", spam_code, False else: return "待审核,您将在12小时内收到审核结果", spam_code, False def extract_msg(self, result, suffix): if "error_msg" in result: return [{"error_msg": result['error_msg']}] if not suffix == '.gif': if 'data' not in result.keys(): return [""] data = result['data'] msg = [d["msg"] for d in data] return msg else: data = result['conclusion'] msg = [data] return msg def is_ok(self, msg, suffix): if isinstance(msg[0], dict): return False if not suffix == ".gif": return ("存在色情内容" not in msg) and ("存在政治敏感内容" not in msg) else: return "色情" not in msg