class TelegramAction(BasicAction): """Action to send a Telegram message to a certain user or group""" name = "TelegramAction" def __init__(self, token, receiver, custom_payload=None, template=None): super().__init__() self.logger = logging.getLogger(__name__) if not re.match("[0-9]+:[a-zA-Z0-9\-_]+", token) or token is None: raise ValueError("Bot token not correct or None!") self.token = token self.receiver = receiver self.custom_payload = custom_payload self.template = template self.request = Request() # TODO add possibility to send a template message and inject the paste data into the template def perform(self, paste, analyzer_name=None): """Send a message via a Telegram bot to a specified user, without checking for errors""" # if self.template: # text = self.template.format() text = "New paste matched by analyzer '{0}' - Link: {1}".format(analyzer_name, paste.full_url) api_url = "https://api.telegram.org/bot{0}/sendMessage?chat_id={1}&text={2}".format(self.token, self.receiver, text) self.request.get(api_url)
class Socket: def __init__(self): url = os.getenv('SERVER_URI') self.request = Request(url) def emit(self, data): self.request.post('/prob-data', {"prob": data})
def callback(): code = request.args.get('code') if not code: logger.error("缺少code参数") return jsonify(Response.error(400, "缺少code参数")) logger.info("【code】" + str(code)) # 通过code请求到access_token token_url = urls.get_token_url(code) resp = Request.get(token_url, to_dict=False) print(resp) try: access_token = re.findall("access_token=(.*?)&expires_in", resp)[0] logger.info("【access_token】" + str(access_token)) except IndexError: logger.error('获取access_token错误') return jsonify(Response.error(400, "获取access_token错误")) session['qq_access_token'] = access_token # 通过access_token得到openid openid_url = urls.get_openid_url(access_token) resp = Request.get(openid_url) print(resp) openid = resp.get('openid') logger.info("【openid】" + str(openid)) session['openid'] = openid return redirect(url_for('get_user_info'))
def __init__(self, paste_queue=None, exception_event=None): super().__init__(exception_event) self.logger = logging.getLogger(__name__) self._last_scrape_time = 0 self.paste_queue = paste_queue or Queue() self._tmp_paste_queue = Queue() self._known_pastes = [] self._known_pastes_limit = 1000 self.request = Request()
def __init__(self, token, receiver, custom_payload=None, template=None): super().__init__() self.logger = logging.getLogger(__name__) if not re.match("[0-9]+:[a-zA-Z0-9\-_]+", token) or token is None: raise ValueError("Bot token not correct or None!") self.token = token self.receiver = receiver self.custom_payload = custom_payload self.template = template self.request = Request()
def build_video_download_request(cls, url): """ 下载视频 :param url: :return: """ return Request(url=url, method=HttpMethod.GET, headers=cls.HEADERS)
def build_oid_api_request(cls, mid, pn=1, ps=100, tid=0, order='pubdate', keyword=None): """ 返回 up 所有的视频av 号 :param mid: up id :param pn: :param ps: 每页数量 :param tid: 0:全部 :param order: :param keyword: :return: """ url = cls.BASE_API_URL + '/x/space/arc/search' params = { 'mid': mid, 'pn': pn, 'ps': ps, 'tid': tid, 'order': order, 'keyword': keyword, } return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS)
def run_server(host='127.0.0.1', port=8001, max_in=1): # 实例化socket类 with socket.socket() as s: # 设置socket在服务端关闭之后马上释放端口 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # 绑定host port s.bind((host, port)) print('server start, welcome socket at port:', port, '\n') while (True): s.listen(max_in) # 设置最大连接数 connection, addr = s.accept() print('create socket with: ', *addr) packet = '' buffer_size = 1024 while (True): data = connection.recv(buffer_size) # print(data) data = data.decode() # print(data) packet += data if len(data) < buffer_size: break if len(packet.split()) < 2: # 防止浏览器传送空请求 continue r = Request(packet) # 根据请求内容生成request类 print(str(datetime.now())[:19], *addr, r.method, r.path) # 显示log信息 response = response_request(r) connection.sendall(response)
def build_archive_api_request(cls, aid): """ 获取视频统计信息,包含合集 :param aid: :return: """ url = cls.BASE_API_URL + '/x/web-interface/archive/stat' params = {'aid': aid} return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS)
def build_aid_api_request(cls, aid): """ 根据 aid 获取视频信息 :param aid: :return: """ url = cls.BASE_API_URL + '/x/web-interface/view' params = {'aid': aid} return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS)
def get_user_info(): """ 从session中得到用户的access_token和openid得到用户的基本信息 :return: """ if 'qq_access_token' in session: openid = session.get('openid') access_token = session.get('qq_access_token') logger.info("【openid】" + str(openid)) logger.info("【access_token】" + str(access_token)) user_info_url = urls.get_user_info_url(access_token, openid) resp = Request.get(user_info_url) return jsonify(Response.success(resp)) return jsonify(Response.error(400, "获取用户信息失败"))
def build_dm_api_request(cls, oid): """ 获取弹幕信息 :param oid: :return: """ url = cls.BASE_API_URL + '/x/v1/dm/list.so' params = { 'oid': oid, } return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS)
def runOnce(sock: socket.socket) -> None: (connection, (host, port)) = sock.accept() header = bytearray() while not header.endswith(b"\r\n" * 2): chunk = connection.recv(1) header.extend(chunk) request = Request.fromBytes(header) # parse request if "Content-Length" in request.headers: while len(request.body) < int(request.headers["Content-Length"]): chunk = connection.recv(4096) request.body.extend(chunk) else: # if browser does not include a Content-Length header, then request has no body pass # while True: # chunk: bytes = connection.recv(4096) # if chunk: # request.body.extend(chunk) # else: # break headers = {"Content-Type": "application/json"} if request.pathname != "/product": response = Response(404, body=b"404 Not Found") elif request.params == {}: response = Response(400, body=b"400 Bad Request") else: try: operands = list(map(float, request.params.values())) # this could fail result = reduce(lambda x, y: x * y, operands) body = { "operation": "product", "operands": operands, "result": result } response = Response(200, headers=headers, body=bytes(json.dumps(body, indent=4), "utf8")) except: response = Response(400, body=b"400 Bad Request") connection.sendall(bytes(response)) connection.close() print("{} {} {}".format(request.method, request.url, response.statusCode))
def runForever(port): sock = socket.socket( family=socket.AF_INET, type=socket.SOCK_STREAM ) # according to <https://docs.python.org/3/library/socket.html#socket.AF_INET> sock.bind(("", port)) sock.listen(5) while True: try: (connection, (host, port)) = sock.accept() except: print("Keyboard interrupt. Exitting.") sock.close() break header = bytearray() while not header.endswith(b"\r\n" * 2): # read the header only chunk = connection.recv(1) header.extend(chunk) request = Request.fromBytes(header) # parse request path = "." + request.pathname headers = { "Content-Type": "text/html", } if os.path.exists(path): if path.endswith(".html") or path.endswith(".htm"): response = Response(200, headers=headers) with open(os.path.join(".", path), "rb") as f: response.body.extend(f.read()) else: response = Response(403, body=b"<h1>403 Forbidden</h1>", headers=headers) else: response = Response(404, body=b"<h1>404 Not Found</h1>", headers=headers) connection.sendall(bytes(response)) print("{} {} {}".format(request.method, request.pathname, response.statusCode)) connection.close()
def build_sign_cid_api_request(cls, cid, qn=116): """ 获取视频下载信息(旧版签名方式) :param cid: :param qn: :return: """ entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg' appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':') params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % ( appkey, cid, qn, qn) chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest() url = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum) cookies = {'SESSDATA': CONFIG['SESSION_DATA']} return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS, cookies=cookies)
def build_cid_api_request(cls, avid, cid): """ 获取视频下载信息 :param avid: :param cid: :return: """ url = cls.BASE_API_URL + '/x/player/playurl' params = { 'avid': avid, 'cid': cid, 'qn': 80, 'fnver': 0, 'fnval': 16, } cookies = {'SESSDATA': CONFIG['SESSION_DATA']} return Request(url=url, method=HttpMethod.GET, params=params, headers=cls.HEADERS, cookies=cookies)
def get(url, headers=None): """Naive GET, does not handle any error or redirect""" request = Request(url) # Your client must include a "Host: " header ip = socket.gethostbyname(request.hostname) port = request.port sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((ip, port)) sock.send(bytes(request)) # use Content-Length to decide when the response has been fully transferred header = bytearray() while not header.endswith(b"\r\n" * 2): chunk = sock.recv(1) header.extend(chunk) response = Response.fromBytes(header) # construct response from header if "Content-Length" in response.headers: contentLength = int(response.headers["Content-Length"] ) # get Content-Length field. If none, no body while len( response.body ) < contentLength: # keep reading body until reaching Content-Length chunk = sock.recv(4096) response.body.extend(chunk) else: # if there is no Content-Length field in header, then assume server would close the stream when finishing while True: # keep reading until stream is closed chunk = sock.recv(4096) if chunk: # stream has been closed response.body.extend(chunk) else: break sock.close() return response
def deserialize(message): return RequestMessage(message[0], Request.deserialize(message[1]))
def get_reps_json(self, com_json): """ 获取 回复 包括 回复的回复 的数据 # 20-04-17 代码检查 OK # 20-05-07 代码修改, 当获取的内容为 None 时, raise Exception 数据接口参考: https://www.toutiao.com/2/comment/v2/reply_list/?aid=24&app_name=toutiao-web&id=6733175468666748931&offset=0&count=20&repost=0 :param com_json: :return: example: [ { "id":6777317683702185995, "id_str":"6777317683702185995", "create_time":1577967243, "text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的", "content":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的", "content_rich_span":"{"links":[]}", "digg_count":6, "forward_count":0, "user_digg":false, "is_owner":false, "has_author_digg":0, "thumb_image_list":[ ], "large_image_list":[ ], "user":{ "user_id":4540648983, "name":"拔吊无情3344", "screen_name":"拔吊无情3344", "avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image", "description":"", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_following":false, "is_followed":false, "is_blocking":false, "is_blocked":false, "author_badge":[ ], "author_badge_night":[ ], "interact_style":0, "is_pgc_author":false, "user_relation":0, "user_decoration":"", "band_url":"", "band_name":"" }, "group":null, "repost_params":null }, { "id":6777509244712976384, "id_str":"6777509244712976384", "create_time":1578011843, "text":"现在我也是半个半个买了,太贵了", "content":"现在我也是半个半个买了,太贵了", "content_rich_span":"{"links":[]}", "digg_count":1, "forward_count":0, "user_digg":false, "is_owner":false, "has_author_digg":0, "thumb_image_list":[ ], "large_image_list":[ ], "user":{ "user_id":5943146542, "name":"灵辉72330603", "screen_name":"灵辉72330603", "avatar_url":"http://p3.pstatp.com/thumb/5ac8001ee3bc186542d0", "description":"", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_following":false, "is_followed":false, "is_blocking":false, "is_blocked":false, "author_badge":[ ], "author_badge_night":[ ], "interact_style":0, "is_pgc_author":false, "user_relation":0, "user_decoration":"", "band_url":"", "band_name":"" }, "group":null, "repost_params":null, "reply_to_comment":{ "id":6777317683702185995, "id_str":"6777317683702185995", "text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的", "content_rich_span":"{"links":[]}", "status":1, "user_id":4540648983, "user_name":"拔吊无情3344", "user_profile_image_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_pgc_author":false, "is_followed":false, "is_following":false, "user_relation":0, "large_image_list":[ ], "thumb_image_list":[ ] } }, { "id":6765863594673405963, "id_str":"6765863594673405963", "create_time":1575300379, "text":"那精致的吃法。或用水果叉还撒盐。", "content":"那精致的吃法。或用水果叉还撒盐。", "content_rich_span":"{"links":[]}", "digg_count":5, "forward_count":0, "user_digg":false, "is_owner":false, "has_author_digg":0, "thumb_image_list":[ ], "large_image_list":[ ], "user":{ "user_id":64403579548, "name":"軋軋闹猛", "screen_name":"軋軋闹猛", "avatar_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image", "description":"hello", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_following":false, "is_followed":false, "is_blocking":false, "is_blocked":false, "author_badge":[ ], "author_badge_night":[ ], "interact_style":0, "is_pgc_author":false, "user_relation":0, "user_decoration":"", "band_url":"", "band_name":"" }, "group":null, "repost_params":null }, { "id":6778028635040694272, "id_str":"6778028635040694272", "create_time":1578132771, "text":"是太贵了", "content":"是太贵了", "content_rich_span":"{"links":[]}", "digg_count":0, "forward_count":0, "user_digg":false, "is_owner":false, "has_author_digg":0, "thumb_image_list":[ ], "large_image_list":[ ], "user":{ "user_id":85682550538, "name":"高木同学w", "screen_name":"高木同学w", "avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/user-avatar/8a763365d363bd85011a7c3c9e319bca~120x256.image", "description":"", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_following":false, "is_followed":false, "is_blocking":false, "is_blocked":false, "author_badge":[ ], "author_badge_night":[ ], "interact_style":0, "is_pgc_author":false, "user_relation":0, "user_decoration":"", "band_url":"", "band_name":"" }, "group":null, "repost_params":null, "reply_to_comment":{ "id":6765863594673405963, "id_str":"6765863594673405963", "text":"那精致的吃法。或用水果叉还撒盐。", "content_rich_span":"{"links":[]}", "status":1, "user_id":64403579548, "user_name":"軋軋闹猛", "user_profile_image_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_pgc_author":false, "is_followed":false, "is_following":false, "user_relation":0, "large_image_list":[ ], "thumb_image_list":[ ] } }, { "id":6777979056702308367, "id_str":"6777979056702308367", "create_time":1578121229, "text":"要是中国一个西瓜二百块钱,那也会用牙签吃。", "content":"要是中国一个西瓜二百块钱,那也会用牙签吃。", "content_rich_span":"{"links":[]}", "digg_count":0, "forward_count":0, "user_digg":false, "is_owner":false, "has_author_digg":0, "thumb_image_list":[ ], "large_image_list":[ ], "user":{ "user_id":109181261304, "name":"7853624655665862", "screen_name":"7853624655665862", "avatar_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3797/2889309425~120x256.image", "description":"", "user_verified":false, "verified_reason":"", "user_auth_info":"", "is_following":false, "is_followed":false, "is_blocking":false, "is_blocked":false, "author_badge":[ ], "author_badge_night":[ ], "interact_style":0, "is_pgc_author":false, "user_relation":0, "user_decoration":"", "band_url":"", "band_name":"" }, "group":null, "repost_params":null } ] """ try: reply_url = 'https://www.toutiao.com/api/pc/2/comment/v4/reply_list/?' \ 'aid=24&app_name=toutiao-web&id={0}&offset=0&count={1}&repost=0' \ .format(com_json['id'], 20) headers = { "Host": "www.toutiao.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } result = Request.Request(reply_url, headers).more()['data']['data'] if result is None: raise logging.info("获取回复信息 %s 成功" % reply_url) return result except: # logging.exception("获取回复信息 失败") raise
def get_coms_json(self, art_brief_json): """ 获取评论列表 # 20-04-17 代码检查 OK # 20-04-28 将单次获取评论的数量从 10 变成了 20 # 20-05-02 日志打印内容修改, 修改 URL 中的 aid 参数, 这个作用不明确, 先调小一点 参考接口: https://www.toutiao.com/article/v2/tab_comments/?aid=24&app_name=toutiao-web&group_id=6732655510039822860&item_id=6732655510039822860&offset=0&count=5 :param art_brief_json: :return: example: [ Object{...}, { "comment":{ "id":6732812439848665099, "id_str":"6732812439848665099", "text":"见过一个女孩,搞防晒真的是全副武装,... 承认我是个女的😂", "content_rich_span":"{"links":[]}", "reply_count":71, "reply_list":[ ], "digg_count":1372, "bury_count":0, "forward_count":0, "create_time":1567605056, "score":1.8130338214242776, "user_id":1684053931865459, "user_name":"用户896765315284", "remark_name":"", "user_profile_image_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3793/3131589739~120x256.image", "user_verified":false, "interact_style":0, "is_following":0, "is_followed":0, "is_blocking":0, "is_blocked":0, "is_pgc_author":0, "author_badge":[ ], "author_badge_night":[ ], "verified_reason":"", "user_bury":0, "user_digg":0, "user_relation":0, "user_auth_info":"", "user_decoration":"", "band_url":"", "band_name":"", "aid":35, "large_image_list":[ ], "thumb_image_list":[ ], "media_info":{ "name":"", "avatar_url":"" }, "tags":null, "platform":"feifei", "has_author_digg":0, "multi_media":null, "has_multi_media":false, "show_tags":0 }, "ad":null, "embedded_data":null, "id":6732812439848665099, "cell_type":1 }, Object{...}, Object{...}, Object{...} ], """ try: com_url = 'https://www.toutiao.com/api/pc/article/v4/tab_comments/?' \ 'aid=1&app_name=toutiao-web&group_id={0}&item_id={1}&offset=0&count={2}' \ .format(art_brief_json['group_id'], art_brief_json['item_id'], 20) headers = { "Host": "www.toutiao.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } result = Request.Request(com_url, headers).more()['data'] logging.info("获取评论页数据 %s 成功" % com_url) return result except: logging.exception("获取评论页数据 失败") return None
def runForever(port): sock = socket.socket( family=socket.AF_INET, type=socket.SOCK_STREAM ) # according to <https://docs.python.org/3/library/socket.html#socket.AF_INET> sock.bind(("", port)) sock.listen(5) readers = { sock: None, } while True: try: readables, *_ = select.select(readers, [], []) except: print("Keyboard interrupt. Exitting.") for v in readers.keys(): # clean up v.close() break for readable in readables: if readable is sock: # new connection coming in (connection, (ip, port)) = sock.accept() readers[connection] = { "state": "header", "header": bytearray() } else: # other clients if readers[readable][ "state"] == "header": # in the state of reading header chunk = readable.recv(1) readers[readable]["header"].extend(chunk) if readers[readable]["header"].endswith( b"\r\n" * 2): # request header fully transferred try: request = Request.fromBytes( readers[readable] ["header"]) # parse request header except: # fail to parse header traceback.print_exc() response = Response( 403, body=b"HTTP request is invalid: <pre>" + readers[readable]["header"] + b"</pre>") readable.sendall(bytes(response)) readable.close() print("{} {} {}".format(request.method, request.pathname, response.statusCode)) readers.pop(readable) continue if ( "Content-Length" in request.headers and request.headers["Content-Length"] == 0 ) or "Content-Length" not in request.headers: # if Content-Length: 0 or Content-Length not available, serve immediately response = staticFile( request.pathname) # generate response readable.sendall(bytes(response)) # serve response readable.close() print("{} {} {}".format(request.method, request.pathname, response.statusCode)) del readers[readable] else: # need to read the whole request body readers[readable]["state"] = "body" readers[readable]["request"] = request readers[readable].pop("header") continue else: # request header not fully transferred continue # keep reading in the next iteration else: # in the state of reading body chunk = readable.recv(4096) request = readers[readable]["request"] request.body.extend(chunk) if len(request.body) >= int( request.headers["Content-Length"] ): # there is a Content-Length, guaranteed, because we have served all requests that do not have one already response = staticFile(request.pathname) readable.sendall(bytes(response)) readable.close() print("{} {} {}".format(request.method, request.pathname, response.statusCode)) readers.pop(readable) else: continue
def __init__(self): url = os.getenv('SERVER_URI') self.request = Request(url)
def get_arts_brief_json_by_category(self, category): """ 这是万恶之源? 参考接口: http://m.toutiao.com/list/?tag=__all__&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618 :param category: '社会': 'news_society', '娱乐': 'news_entertainment', '科技': 'news_tech', '军事': 'news_military', '体育': 'news_sports' '汽车': 'news_car', '财经': 'news_finance', '国际': 'news_world', '时尚': 'news_fashion', '旅游': 'news_travel', '探索': 'news_discovery', '育儿': 'news_baby', '养生': 'news_regimen', '故事': 'news_story', '美文': 'news_essay', '游戏': 'news_game', '历史': 'news_history', '美食': 'news_food', :return: example: [ Object{...}, Object{...}, Object{...}, { "media_name":"呦呦科学馆", "ban_comment":0, "abstract":"大家对屎壳郎都会有所耳闻,屎壳郎的原名是蜣螂(qiāng láng),一种痴迷于推粪球的昆虫,我们一直以来都没有什么正面评价,有时候在评价一个人不好的时候,会把他说成是屎壳郎。虽然屎壳郎在我们这儿,没有什么正面的评价,但是有些地方把它当成是神哦!不是负面的神,而是太阳神!", "image_list":[ { "url":"http://p6-tt.byteimg.com/img/pgc-image/a9b944c7417847e687b5f97aeb2798ad~tplv-tt-cs0:640:360.jpg", "width":640, "height":360 }, { "url":"http://p6-tt.byteimg.com/img/pgc-image/2ca2e18621f942daa6b7e602205b4492~tplv-tt-cs0:525:576.jpg", "width":525, "height":295 }, { "url":"http://p9-tt.byteimg.com/img/pgc-image/3605e63e394e45b494d2cc5914662a8c~tplv-tt-cs0:628:344.jpg", "width":628, "height":344 } ], "datetime":"2020-01-03 20:30", "article_type":0, "more_mode":true, "tag":"news_story", "has_m3u8_video":0, "display_dt":1577527020, "has_mp4_video":0, "aggr_type":1, "cell_type":0, "article_sub_type":0, "bury_count":0, "title":"屎壳郎的故事", "source_icon_style":1, "tip":0, "has_video":false, "share_url":"http://toutiao.com/a6775056296904229390/?app=news_article&is_hit_share_recommend=0", "source":"呦呦科学馆", "comment_count":0, "article_url":"http://toutiao.com/group/6775056296904229390/", "publish_time":1577527020, "group_flags":0, "gallary_image_count":4, "action_extra":"{"channel_id": 3189398979}", "tag_id":"6775056296904229390", "source_url":"/i6775056296904229390/", "display_url":"http://toutiao.com/group/6775056296904229390/", "is_stick":false, "item_id":"6775056296904229390", "repin_count":12, "cell_flag":262155, "source_open_url":"sslocal://profile?uid=566976876133454", "level":0, "digg_count":6, "behot_time":1578054637, "hot":0, "cursor":1578054637999, "url":"http://toutiao.com/group/6775056296904229390/", "user_repin":0, "has_image":true, "video_style":0, "media_info":{ "avatar_url":"http://p1.pstatp.com/large/ffe800001f90d3b65398", "media_id":1629031487078411, "name":"呦呦科学馆", "user_verified":true }, "group_id":"6775056296904229390" }, Object{...} ] """ try: url = 'http://m.toutiao.com/list/?tag={0}&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618'.format( category) headers = { "Host": "m.toutiao.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } cookies = { "CNZZDATA1259612802\t": "1495391056-1572663800-https%253A%252F%252Fwww.toutiao.com%252F%7C1573891961\t", "UM_distinctid\t": "16e2a5bff8b3ef-01fd3a213050ad-1d3a6a5b-1aeaa0-16e2a5bff8cf52\t", "WEATHER_CITY": "%E5%8C%97%E4%BA%AC\t", "__tasessionId\t": "c1wuholqh1575530017796\t", "_ga": "GA1.2.268549673.1572668703", "csrftoken": "4e76bad8185f77ea8b647e50e3bb0e26", "s_v_web_id": "7fabbf42df76a1ccd37bfea2d2c5db76", "tt_webid": "6754560229981750791" } result = Request.Request(url, headers, cookies).more()['data'] # print(result) logging.info("获取新闻缩率信息 %s 成功" % url) return result except: logging.exception("获取新闻缩率信息 失败") return None
def get_art_json(self, art_brief_json): """ 获得一则新闻的具体内荣 参考接口: http://m.toutiao.com/i6364969235889783298/info/ :param art_brief_json: :return: example: { "detail_source":"正向娱乐energy", "media_user":{ "screen_name":"正向娱乐energy", "no_display_pgc_icon":false, "avatar_url":"http://p1.pstatp.com/thumb/ff0600002c7db3631cde", "id":"52681187308", "user_auth_info":{ "auth_type":"0", "other_auth":{ "interest":"优质娱乐领域创作者" }, "auth_info":"青云计划获奖者 优质娱乐领域创作者" } }, "publish_time":1574065021, "hotwords":[ { "stress_type":0, "hot_word":"录明星整容视频勒索" }, { "stress_type":1, "hot_word":"宁静再演孝庄" }, ... ], "labels":[ ], "title":"李诞向左,池子向右,脱口秀背后的悲喜人生", "url":"http://toutiao.com/group/6760557790046978567/", "high_quality_flag":"0", "impression_count":"790339", "is_original":true, "is_pgc_article":true, "content":"<div class="pgc-img">... ... 剩下的都是文章内容的 HTML 形式", "source":"正向娱乐energy", "comment_count":326, "logo_show_strategy":"normal", "hupu_content_image_urls":[ "" ], "creator_uid":52408555030 } """ try: art_url = 'http://m.toutiao.com/i{0}/info/'.format( art_brief_json['item_id']) headers = { "Host": "m.toutiao.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } result = Request.Request(art_url, headers).more()['data'] logging.info("获取新闻内容数据 %s 成功" % art_url) return result except: logging.exception("获取新闻内容数据 失败") return None
def get_arts_brief_json_by_category(self, category): """ 获取文章缩率信息, 不包括文章内容 # 20-04-17 方法检查 OK 参考接口: http://m.toutiao.com/list/?tag=__all__&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618 :param category: '社会': 'news_society', '娱乐': 'news_entertainment', '科技': 'news_tech', '军事': 'news_military', '体育': 'news_sports' '汽车': 'news_car', '财经': 'news_finance', '国际': 'news_world', '时尚': 'news_fashion', '旅游': 'news_travel', '探索': 'news_discovery', '育儿': 'news_baby', '养生': 'news_regimen', '故事': 'news_story', '美文': 'news_essay', '游戏': 'news_game', '历史': 'news_history', '美食': 'news_food', :return: example: [ Object{...}, Object{...}, Object{...}, { "media_name":"呦呦科学馆", "ban_comment":0, "abstract":"大家对屎壳郎都会有所耳闻,屎壳郎的原名是蜣螂(qiāng láng),一种痴迷于推粪球的昆虫,我们一直以来都没有什么正面评价,有时候在评价一个人不好的时候,会把他说成是屎壳郎。虽然屎壳郎在我们这儿,没有什么正面的评价,但是有些地方把它当成是神哦!不是负面的神,而是太阳神!", "image_list":[ { "url":"http://p6-tt.byteimg.com/img/pgc-image/a9b944c7417847e687b5f97aeb2798ad~tplv-tt-cs0:640:360.jpg", "width":640, "height":360 }, { "url":"http://p6-tt.byteimg.com/img/pgc-image/2ca2e18621f942daa6b7e602205b4492~tplv-tt-cs0:525:576.jpg", "width":525, "height":295 }, { "url":"http://p9-tt.byteimg.com/img/pgc-image/3605e63e394e45b494d2cc5914662a8c~tplv-tt-cs0:628:344.jpg", "width":628, "height":344 } ], "datetime":"2020-01-03 20:30", "article_type":0, "more_mode":true, "tag":"news_story", "has_m3u8_video":0, "display_dt":1577527020, "has_mp4_video":0, "aggr_type":1, "cell_type":0, "article_sub_type":0, "bury_count":0, "title":"屎壳郎的故事", "source_icon_style":1, "tip":0, "has_video":false, "share_url":"http://toutiao.com/a6775056296904229390/?app=news_article&is_hit_share_recommend=0", "source":"呦呦科学馆", "comment_count":0, "article_url":"http://toutiao.com/group/6775056296904229390/", "publish_time":1577527020, "group_flags":0, "gallary_image_count":4, "action_extra":"{"channel_id": 3189398979}", "tag_id":"6775056296904229390", "source_url":"/i6775056296904229390/", "display_url":"http://toutiao.com/group/6775056296904229390/", "is_stick":false, "item_id":"6775056296904229390", "repin_count":12, "cell_flag":262155, "source_open_url":"sslocal://profile?uid=566976876133454", "level":0, "digg_count":6, "behot_time":1578054637, "hot":0, "cursor":1578054637999, "url":"http://toutiao.com/group/6775056296904229390/", "user_repin":0, "has_image":true, "video_style":0, "media_info":{ "avatar_url":"http://p1.pstatp.com/large/ffe800001f90d3b65398", "media_id":1629031487078411, "name":"呦呦科学馆", "user_verified":true }, "group_id":"6775056296904229390" }, Object{...} ] :param category: :return: """ try: url = 'http://m.toutiao.com/list/?tag={0}&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618'.format(category) headers = { "Host": "m.toutiao.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } cookies = { "tt_webid": "6754560229981750791", "WEATHER_CITY": "%E5%8C%97%E4%BA%AC", "csrftoken": "4e76bad8185f77ea8b647e50e3bb0e26", "_ga": "GA1.2.268549673.1572668703", "__utmz": "24953151.1578051975.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)", "__utma": "24953151.268549673.1572668703.1578051975.1578221568.2", "SLARDAR_WEB_ID": "60060dd2-e5b2-470c-b7f4-09b8d877a031", "ttcid": "3f7ad0f028c54aa584367115a3fa7cb028", "__ac_nonce": "05ecba43c0082bbdfb3e", "__ac_signature": "O2auwAAgEBACmHx7dlnWxztnr9AAGWz3dGBVQL.r0H1se1MOmPcu.0Mxo.Y9Zem1qLCx5rV13rttFjnqiBP7d4KfTBbgN8Az4ip.Po5Ht9XBlX2CZW8ZIUZV9blZ9smX461", "s_v_web_id": "verify_kamdklmd_rcCyRT2X_ZLBe_4OQI_Brjy_Q7HFKjeCzPbt", "__tasessionId": "gx98j8z1x1590404157654", "tt_scid": "nLlc9z1mWQgXRhJFHC7i29KXmnHM9sZynN1Ue86iBmH5DPSaDONSaQYQz7mqHMcMf924" } result = Request.Request(url, headers, cookies).more()['data'] # print(result) logging.info("获取新闻缩率信息 %s 成功" % url) return result except: # logging.exception("获取新闻缩率信息 失败") return None
def add_requests(self, requests): for url, period in requests: self.add_request(Request(url), period=period)
def perform(self, paste, analyzer_name=None): """Trigger the webhook""" # TODO - More post options ([custom] body, template, choose between GET/POST etc.) r = Request() r.post(url=self.url)
class PastebinScraper(BasicScraper): name = "PastebinScraper" api_base_url = "https://scrape.pastebin.com" def __init__(self, paste_queue=None, exception_event=None): super().__init__(exception_event) self.logger = logging.getLogger(__name__) self._last_scrape_time = 0 self.paste_queue = paste_queue or Queue() self._tmp_paste_queue = Queue() self._known_pastes = [] self._known_pastes_limit = 1000 self.request = Request() def _check_error(self, body): """Checks if an error occurred and raises an exception if it did""" if body is None: raise EmptyBodyException() if "DOES NOT HAVE ACCESS" in body: self._exception_event.set() raise IPNotRegisteredError() def _get_recent(self, limit=100): """Downloads a list of the most recent pastes - the amount is limited by the <limit> parameter""" endpoint = "api_scraping.php" api_url = "{0}/{1}?limit={2}".format(self.api_base_url, endpoint, limit) try: response_data = self.request.get(api_url) self._check_error(response_data) pastes_dict = json.loads(response_data) pastes = [] # Loop through the response and create objects by the data for paste in pastes_dict: paste_obj = Paste(key=paste.get("key"), title=paste.get("title"), user=paste.get("user"), size=paste.get("size"), date=paste.get("date"), expire=paste.get("expire"), syntax=paste.get("syntax"), scrape_url=paste.get("scrape_url"), full_url=paste.get("full_url")) pastes.append(paste_obj) return pastes except Exception as e: self.logger.error(e) return None def _get_paste_content(self, key): """Downloads the content of a certain paste""" endpoint = "api_scrape_item.php" api_url = "{0}/{1}?i={2}".format(self.api_base_url, endpoint, key) content = "" self.logger.debug("Downloading paste {0}".format(key)) try: response_data = self.request.get(api_url) self._check_error(response_data) content = response_data except Exception as e: self.logger.error(e) return content def _body_downloader(self): """Downloads the body of pastes from pastebin, which have been put into the queue""" while self.running: try: self.logger.debug("Queue size: {}".format(self._tmp_paste_queue.qsize())) if self._stop_event.is_set() or self._exception_event.is_set(): self.running = False break paste = self._tmp_paste_queue.get(True, 1) # if paste is not known, download the body and put it on the queue and into the list last_body_download_time = round(time.time(), 2) body = self._get_paste_content(paste.key) paste.set_body(body) self.paste_queue.put(paste) current_time = round(time.time(), 2) diff = round(current_time - last_body_download_time, 2) if diff >= 1: continue sleep_diff = round(1 - diff, 3) self.logger.debug("Sleep time is: {0}".format(sleep_diff)) time.sleep(sleep_diff) except Empty: continue def start(self, paste_queue): """Start the scraping process and download the paste metadata""" self.paste_queue = paste_queue self.running = True start_thread(self._body_downloader, "BodyDownloader", self._exception_event) while self.running: self._last_scrape_time = int(time.time()) pastes = self._get_recent(limit=100) counter = 0 if pastes is not None: for paste in pastes: # check if paste is in list of known pastes if paste.key in self._known_pastes: # Do nothing, if it's already known continue self.logger.debug("Paste is unknown - adding ot to list {}".format(paste.key)) self._tmp_paste_queue.put(paste) self._known_pastes.append(paste.key) counter += 1 if self._stop_event.is_set() or self._exception_event.is_set(): self.running = False break self.logger.debug("{0} new pastes fetched!".format(counter)) # Delete some of the last pastes to not run into memory/performance issues if len(self._known_pastes) > 1000: self.logger.debug("known_pastes > 1000 - cleaning up!") start_index = len(self._known_pastes) - self._known_pastes_limit self._known_pastes = self._known_pastes[start_index:] if self._stop_event.is_set() or self._exception_event.is_set(): self.logger.debug('stopping {0}'.format(self.name)) self.running = False break # check if time since last current_time = int(time.time()) diff = current_time - self._last_scrape_time # if the last scraping happened less than 60 seconds ago, # wait until the 60 seconds passed if diff < 60: sleep_time = 60 - diff time.sleep(sleep_time)
from uuid import uuid1 from azure.storage.table.tableservice import TableService from _bareasgi import text_reader, text_response, json_response import config from config import azure_storage from util import Request, URL from phrase_metric import similarity table_service = TableService(account_name=azure_storage['account'], account_key=azure_storage['key']) answer_search = Request( 'GET', config.answer_search.url, params={'mkt': 'en-us'}, headers={'Ocp-Apim-Subscription-Key': config.answer_search.key}) qna_maker = Request( 'POST', config.qna_maker.url, headers={'Authorization': f'EndpointKey {config.qna_maker.key}'}) def uuid(): return uuid1().hex def get_answer(answer): global _answer