コード例 #1
0
class TelegramAction(BasicAction):
    """Action to send a Telegram message to a certain user or group"""
    name = "TelegramAction"

    def __init__(self, token, receiver, custom_payload=None, template=None):
        super().__init__()
        self.logger = logging.getLogger(__name__)

        if not re.match("[0-9]+:[a-zA-Z0-9\-_]+", token) or token is None:
            raise ValueError("Bot token not correct or None!")

        self.token = token
        self.receiver = receiver
        self.custom_payload = custom_payload
        self.template = template
        self.request = Request()
        # TODO add possibility to send a template message and inject the paste data into the template

    def perform(self, paste, analyzer_name=None):
        """Send a message via a Telegram bot to a specified user, without checking for errors"""
        # if self.template:
        #    text = self.template.format()
        text = "New paste matched by analyzer '{0}' - Link: {1}".format(analyzer_name, paste.full_url)
        api_url = "https://api.telegram.org/bot{0}/sendMessage?chat_id={1}&text={2}".format(self.token, self.receiver, text)
        self.request.get(api_url)
コード例 #2
0
ファイル: main.py プロジェクト: edo92/Prob-Control
class Socket:
    def __init__(self):
        url = os.getenv('SERVER_URI')
        self.request = Request(url)

    def emit(self, data):
        self.request.post('/prob-data', {"prob": data})
コード例 #3
0
ファイル: views.py プロジェクト: zhaofei1988/demo
def callback():
    code = request.args.get('code')
    if not code:
        logger.error("缺少code参数")
        return jsonify(Response.error(400, "缺少code参数"))
    logger.info("【code】" + str(code))
    # 通过code请求到access_token
    token_url = urls.get_token_url(code)
    resp = Request.get(token_url, to_dict=False)
    print(resp)
    try:
        access_token = re.findall("access_token=(.*?)&expires_in", resp)[0]
        logger.info("【access_token】" + str(access_token))
    except IndexError:
        logger.error('获取access_token错误')
        return jsonify(Response.error(400, "获取access_token错误"))
    session['qq_access_token'] = access_token
    # 通过access_token得到openid
    openid_url = urls.get_openid_url(access_token)
    resp = Request.get(openid_url)
    print(resp)
    openid = resp.get('openid')
    logger.info("【openid】" + str(openid))
    session['openid'] = openid
    return redirect(url_for('get_user_info'))
コード例 #4
0
    def __init__(self, paste_queue=None, exception_event=None):
        super().__init__(exception_event)
        self.logger = logging.getLogger(__name__)
        self._last_scrape_time = 0
        self.paste_queue = paste_queue or Queue()
        self._tmp_paste_queue = Queue()

        self._known_pastes = []
        self._known_pastes_limit = 1000

        self.request = Request()
コード例 #5
0
    def __init__(self, token, receiver, custom_payload=None, template=None):
        super().__init__()
        self.logger = logging.getLogger(__name__)

        if not re.match("[0-9]+:[a-zA-Z0-9\-_]+", token) or token is None:
            raise ValueError("Bot token not correct or None!")

        self.token = token
        self.receiver = receiver
        self.custom_payload = custom_payload
        self.template = template
        self.request = Request()
コード例 #6
0
 def build_video_download_request(cls, url):
     """
     下载视频
     :param url:
     :return:
     """
     return Request(url=url, method=HttpMethod.GET, headers=cls.HEADERS)
コード例 #7
0
 def build_oid_api_request(cls,
                           mid,
                           pn=1,
                           ps=100,
                           tid=0,
                           order='pubdate',
                           keyword=None):
     """
     返回 up 所有的视频av 号
     :param mid: up id
     :param pn:
     :param ps: 每页数量
     :param tid: 0:全部
     :param order:
     :param keyword:
     :return:
     """
     url = cls.BASE_API_URL + '/x/space/arc/search'
     params = {
         'mid': mid,
         'pn': pn,
         'ps': ps,
         'tid': tid,
         'order': order,
         'keyword': keyword,
     }
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS)
コード例 #8
0
ファイル: runserver.py プロジェクト: TotallyFine/note-repo
def run_server(host='127.0.0.1', port=8001, max_in=1):
    # 实例化socket类
    with socket.socket() as s:
        # 设置socket在服务端关闭之后马上释放端口
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        # 绑定host port
        s.bind((host, port))

        print('server start, welcome socket at port:', port, '\n')
        while (True):
            s.listen(max_in)  # 设置最大连接数
            connection, addr = s.accept()
            print('create socket with: ', *addr)
            packet = ''
            buffer_size = 1024
            while (True):
                data = connection.recv(buffer_size)
                # print(data)
                data = data.decode()
                # print(data)
                packet += data
                if len(data) < buffer_size:
                    break

            if len(packet.split()) < 2:  # 防止浏览器传送空请求
                continue

            r = Request(packet)  # 根据请求内容生成request类
            print(str(datetime.now())[:19], *addr, r.method, r.path)  # 显示log信息
            response = response_request(r)
            connection.sendall(response)
コード例 #9
0
 def build_archive_api_request(cls, aid):
     """
     获取视频统计信息,包含合集
     :param aid:
     :return:
     """
     url = cls.BASE_API_URL + '/x/web-interface/archive/stat'
     params = {'aid': aid}
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS)
コード例 #10
0
 def build_aid_api_request(cls, aid):
     """
     根据 aid 获取视频信息
     :param aid:
     :return:
     """
     url = cls.BASE_API_URL + '/x/web-interface/view'
     params = {'aid': aid}
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS)
コード例 #11
0
ファイル: views.py プロジェクト: zhaofei1988/demo
def get_user_info():
    """
    从session中得到用户的access_token和openid得到用户的基本信息
    :return:
    """
    if 'qq_access_token' in session:
        openid = session.get('openid')
        access_token = session.get('qq_access_token')
        logger.info("【openid】" + str(openid))
        logger.info("【access_token】" + str(access_token))
        user_info_url = urls.get_user_info_url(access_token, openid)
        resp = Request.get(user_info_url)
        return jsonify(Response.success(resp))
    return jsonify(Response.error(400, "获取用户信息失败"))
コード例 #12
0
 def build_dm_api_request(cls, oid):
     """
     获取弹幕信息
     :param oid:
     :return:
     """
     url = cls.BASE_API_URL + '/x/v1/dm/list.so'
     params = {
         'oid': oid,
     }
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS)
コード例 #13
0
def runOnce(sock: socket.socket) -> None:
    (connection, (host, port)) = sock.accept()
    header = bytearray()

    while not header.endswith(b"\r\n" * 2):
        chunk = connection.recv(1)
        header.extend(chunk)

    request = Request.fromBytes(header)  # parse request
    if "Content-Length" in request.headers:

        while len(request.body) < int(request.headers["Content-Length"]):
            chunk = connection.recv(4096)
            request.body.extend(chunk)

    else:  # if browser does not include a Content-Length header, then request has no body
        pass

        # while True:
        #     chunk: bytes = connection.recv(4096)
        #     if chunk:
        #         request.body.extend(chunk)
        #     else:
        #         break

    headers = {"Content-Type": "application/json"}
    if request.pathname != "/product":
        response = Response(404, body=b"404 Not Found")
    elif request.params == {}:
        response = Response(400, body=b"400 Bad Request")
    else:
        try:
            operands = list(map(float,
                                request.params.values()))  # this could fail
            result = reduce(lambda x, y: x * y, operands)
            body = {
                "operation": "product",
                "operands": operands,
                "result": result
            }
            response = Response(200,
                                headers=headers,
                                body=bytes(json.dumps(body, indent=4), "utf8"))
        except:
            response = Response(400, body=b"400 Bad Request")

    connection.sendall(bytes(response))
    connection.close()
    print("{} {} {}".format(request.method, request.url, response.statusCode))
コード例 #14
0
def runForever(port):
    sock = socket.socket(
        family=socket.AF_INET, type=socket.SOCK_STREAM
    )  # according to <https://docs.python.org/3/library/socket.html#socket.AF_INET>
    sock.bind(("", port))
    sock.listen(5)

    while True:
        try:
            (connection, (host, port)) = sock.accept()
        except:
            print("Keyboard interrupt. Exitting.")
            sock.close()
            break

        header = bytearray()

        while not header.endswith(b"\r\n" * 2):  # read the header only
            chunk = connection.recv(1)
            header.extend(chunk)

        request = Request.fromBytes(header)  # parse request

        path = "." + request.pathname
        headers = {
            "Content-Type": "text/html",
        }
        if os.path.exists(path):
            if path.endswith(".html") or path.endswith(".htm"):
                response = Response(200, headers=headers)
                with open(os.path.join(".", path), "rb") as f:
                    response.body.extend(f.read())
            else:
                response = Response(403,
                                    body=b"<h1>403 Forbidden</h1>",
                                    headers=headers)
        else:
            response = Response(404,
                                body=b"<h1>404 Not Found</h1>",
                                headers=headers)

        connection.sendall(bytes(response))
        print("{} {} {}".format(request.method, request.pathname,
                                response.statusCode))
        connection.close()
コード例 #15
0
 def build_sign_cid_api_request(cls, cid, qn=116):
     """
     获取视频下载信息(旧版签名方式)
     :param cid:
     :param qn:
     :return:
     """
     entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg'
     appkey, sec = ''.join([chr(ord(i) + 2)
                            for i in entropy[::-1]]).split(':')
     params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (
         appkey, cid, qn, qn)
     chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
     url = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params,
                                                                     chksum)
     cookies = {'SESSDATA': CONFIG['SESSION_DATA']}
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS,
                    cookies=cookies)
コード例 #16
0
 def build_cid_api_request(cls, avid, cid):
     """
     获取视频下载信息
     :param avid:
     :param cid:
     :return:
     """
     url = cls.BASE_API_URL + '/x/player/playurl'
     params = {
         'avid': avid,
         'cid': cid,
         'qn': 80,
         'fnver': 0,
         'fnval': 16,
     }
     cookies = {'SESSDATA': CONFIG['SESSION_DATA']}
     return Request(url=url,
                    method=HttpMethod.GET,
                    params=params,
                    headers=cls.HEADERS,
                    cookies=cookies)
コード例 #17
0
def get(url, headers=None):
    """Naive GET, does not handle any error or redirect"""
    request = Request(url)  # Your client must include a "Host: " header
    ip = socket.gethostbyname(request.hostname)
    port = request.port
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((ip, port))
    sock.send(bytes(request))

    # use Content-Length to decide when the response has been fully transferred
    header = bytearray()

    while not header.endswith(b"\r\n" * 2):
        chunk = sock.recv(1)
        header.extend(chunk)

    response = Response.fromBytes(header)  # construct response from header
    if "Content-Length" in response.headers:
        contentLength = int(response.headers["Content-Length"]
                            )  # get Content-Length field. If none, no body

        while len(
                response.body
        ) < contentLength:  # keep reading body until reaching Content-Length
            chunk = sock.recv(4096)
            response.body.extend(chunk)

    else:  # if there is no Content-Length field in header, then assume server would close the stream when finishing

        while True:  # keep reading until stream is closed
            chunk = sock.recv(4096)
            if chunk:  # stream has been closed
                response.body.extend(chunk)
            else:
                break

    sock.close()

    return response
コード例 #18
0
ファイル: message.py プロジェクト: Yeolar/tornado-robot
 def deserialize(message):
     return RequestMessage(message[0], Request.deserialize(message[1]))
コード例 #19
0
ファイル: ReplyProcess.py プロジェクト: wakerh1/NewsRecommend
    def get_reps_json(self, com_json):
        """ 获取 回复 包括 回复的回复 的数据

        # 20-04-17 代码检查 OK
        # 20-05-07 代码修改, 当获取的内容为 None 时, raise Exception

        数据接口参考:
            https://www.toutiao.com/2/comment/v2/reply_list/?aid=24&app_name=toutiao-web&id=6733175468666748931&offset=0&count=20&repost=0

        :param com_json:
        :return:
            example:
            [
                {
                    "id":6777317683702185995,
                    "id_str":"6777317683702185995",
                    "create_time":1577967243,
                    "text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
                    "content":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
                    "content_rich_span":"{"links":[]}",
                    "digg_count":6,
                    "forward_count":0,
                    "user_digg":false,
                    "is_owner":false,
                    "has_author_digg":0,
                    "thumb_image_list":[

                    ],
                    "large_image_list":[

                    ],
                    "user":{
                        "user_id":4540648983,
                        "name":"拔吊无情3344",
                        "screen_name":"拔吊无情3344",
                        "avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image",
                        "description":"",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_following":false,
                        "is_followed":false,
                        "is_blocking":false,
                        "is_blocked":false,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "interact_style":0,
                        "is_pgc_author":false,
                        "user_relation":0,
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":""
                    },
                    "group":null,
                    "repost_params":null
                },
                {
                    "id":6777509244712976384,
                    "id_str":"6777509244712976384",
                    "create_time":1578011843,
                    "text":"现在我也是半个半个买了,太贵了",
                    "content":"现在我也是半个半个买了,太贵了",
                    "content_rich_span":"{"links":[]}",
                    "digg_count":1,
                    "forward_count":0,
                    "user_digg":false,
                    "is_owner":false,
                    "has_author_digg":0,
                    "thumb_image_list":[

                    ],
                    "large_image_list":[

                    ],
                    "user":{
                        "user_id":5943146542,
                        "name":"灵辉72330603",
                        "screen_name":"灵辉72330603",
                        "avatar_url":"http://p3.pstatp.com/thumb/5ac8001ee3bc186542d0",
                        "description":"",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_following":false,
                        "is_followed":false,
                        "is_blocking":false,
                        "is_blocked":false,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "interact_style":0,
                        "is_pgc_author":false,
                        "user_relation":0,
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":""
                    },
                    "group":null,
                    "repost_params":null,
                    "reply_to_comment":{
                        "id":6777317683702185995,
                        "id_str":"6777317683702185995",
                        "text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
                        "content_rich_span":"{"links":[]}",
                        "status":1,
                        "user_id":4540648983,
                        "user_name":"拔吊无情3344",
                        "user_profile_image_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_pgc_author":false,
                        "is_followed":false,
                        "is_following":false,
                        "user_relation":0,
                        "large_image_list":[

                        ],
                        "thumb_image_list":[

                        ]
                    }
                },
                {
                    "id":6765863594673405963,
                    "id_str":"6765863594673405963",
                    "create_time":1575300379,
                    "text":"那精致的吃法。或用水果叉还撒盐。",
                    "content":"那精致的吃法。或用水果叉还撒盐。",
                    "content_rich_span":"{"links":[]}",
                    "digg_count":5,
                    "forward_count":0,
                    "user_digg":false,
                    "is_owner":false,
                    "has_author_digg":0,
                    "thumb_image_list":[

                    ],
                    "large_image_list":[

                    ],
                    "user":{
                        "user_id":64403579548,
                        "name":"軋軋闹猛",
                        "screen_name":"軋軋闹猛",
                        "avatar_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image",
                        "description":"hello",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_following":false,
                        "is_followed":false,
                        "is_blocking":false,
                        "is_blocked":false,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "interact_style":0,
                        "is_pgc_author":false,
                        "user_relation":0,
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":""
                    },
                    "group":null,
                    "repost_params":null
                },
                {
                    "id":6778028635040694272,
                    "id_str":"6778028635040694272",
                    "create_time":1578132771,
                    "text":"是太贵了",
                    "content":"是太贵了",
                    "content_rich_span":"{"links":[]}",
                    "digg_count":0,
                    "forward_count":0,
                    "user_digg":false,
                    "is_owner":false,
                    "has_author_digg":0,
                    "thumb_image_list":[

                    ],
                    "large_image_list":[

                    ],
                    "user":{
                        "user_id":85682550538,
                        "name":"高木同学w",
                        "screen_name":"高木同学w",
                        "avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/user-avatar/8a763365d363bd85011a7c3c9e319bca~120x256.image",
                        "description":"",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_following":false,
                        "is_followed":false,
                        "is_blocking":false,
                        "is_blocked":false,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "interact_style":0,
                        "is_pgc_author":false,
                        "user_relation":0,
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":""
                    },
                    "group":null,
                    "repost_params":null,
                    "reply_to_comment":{
                        "id":6765863594673405963,
                        "id_str":"6765863594673405963",
                        "text":"那精致的吃法。或用水果叉还撒盐。",
                        "content_rich_span":"{"links":[]}",
                        "status":1,
                        "user_id":64403579548,
                        "user_name":"軋軋闹猛",
                        "user_profile_image_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_pgc_author":false,
                        "is_followed":false,
                        "is_following":false,
                        "user_relation":0,
                        "large_image_list":[

                        ],
                        "thumb_image_list":[

                        ]
                    }
                },
                {
                    "id":6777979056702308367,
                    "id_str":"6777979056702308367",
                    "create_time":1578121229,
                    "text":"要是中国一个西瓜二百块钱,那也会用牙签吃。",
                    "content":"要是中国一个西瓜二百块钱,那也会用牙签吃。",
                    "content_rich_span":"{"links":[]}",
                    "digg_count":0,
                    "forward_count":0,
                    "user_digg":false,
                    "is_owner":false,
                    "has_author_digg":0,
                    "thumb_image_list":[

                    ],
                    "large_image_list":[

                    ],
                    "user":{
                        "user_id":109181261304,
                        "name":"7853624655665862",
                        "screen_name":"7853624655665862",
                        "avatar_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3797/2889309425~120x256.image",
                        "description":"",
                        "user_verified":false,
                        "verified_reason":"",
                        "user_auth_info":"",
                        "is_following":false,
                        "is_followed":false,
                        "is_blocking":false,
                        "is_blocked":false,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "interact_style":0,
                        "is_pgc_author":false,
                        "user_relation":0,
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":""
                    },
                    "group":null,
                    "repost_params":null
                }
            ]

        """
        try:
            reply_url = 'https://www.toutiao.com/api/pc/2/comment/v4/reply_list/?' \
                        'aid=24&app_name=toutiao-web&id={0}&offset=0&count={1}&repost=0' \
                        .format(com_json['id'], 20)
            headers = {
                "Host": "www.toutiao.com",
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            result = Request.Request(reply_url, headers).more()['data']['data']
            if result is None:
                raise
            logging.info("获取回复信息 %s 成功" % reply_url)
            return result
        except:
            # logging.exception("获取回复信息 失败")
            raise
コード例 #20
0
    def get_coms_json(self, art_brief_json):
        """ 获取评论列表

        # 20-04-17 代码检查 OK
        # 20-04-28 将单次获取评论的数量从 10 变成了 20
        # 20-05-02 日志打印内容修改, 修改 URL 中的 aid 参数, 这个作用不明确, 先调小一点

        参考接口:
            https://www.toutiao.com/article/v2/tab_comments/?aid=24&app_name=toutiao-web&group_id=6732655510039822860&item_id=6732655510039822860&offset=0&count=5

        :param art_brief_json:
        :return:

            example:
            [
                Object{...},
                {
                    "comment":{
                        "id":6732812439848665099,
                        "id_str":"6732812439848665099",
                        "text":"见过一个女孩,搞防晒真的是全副武装,... 承认我是个女的😂",
                        "content_rich_span":"{"links":[]}",
                        "reply_count":71,
                        "reply_list":[

                        ],
                        "digg_count":1372,
                        "bury_count":0,
                        "forward_count":0,
                        "create_time":1567605056,
                        "score":1.8130338214242776,
                        "user_id":1684053931865459,
                        "user_name":"用户896765315284",
                        "remark_name":"",
                        "user_profile_image_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3793/3131589739~120x256.image",
                        "user_verified":false,
                        "interact_style":0,
                        "is_following":0,
                        "is_followed":0,
                        "is_blocking":0,
                        "is_blocked":0,
                        "is_pgc_author":0,
                        "author_badge":[

                        ],
                        "author_badge_night":[

                        ],
                        "verified_reason":"",
                        "user_bury":0,
                        "user_digg":0,
                        "user_relation":0,
                        "user_auth_info":"",
                        "user_decoration":"",
                        "band_url":"",
                        "band_name":"",
                        "aid":35,
                        "large_image_list":[

                        ],
                        "thumb_image_list":[

                        ],
                        "media_info":{
                            "name":"",
                            "avatar_url":""
                        },
                        "tags":null,
                        "platform":"feifei",
                        "has_author_digg":0,
                        "multi_media":null,
                        "has_multi_media":false,
                        "show_tags":0
                    },
                    "ad":null,
                    "embedded_data":null,
                    "id":6732812439848665099,
                    "cell_type":1
                },
                Object{...},
                Object{...},
                Object{...}
            ],

        """
        try:
            com_url = 'https://www.toutiao.com/api/pc/article/v4/tab_comments/?' \
                          'aid=1&app_name=toutiao-web&group_id={0}&item_id={1}&offset=0&count={2}' \
                        .format(art_brief_json['group_id'], art_brief_json['item_id'], 20)
            headers = {
                "Host":
                "www.toutiao.com",
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            result = Request.Request(com_url, headers).more()['data']
            logging.info("获取评论页数据 %s 成功" % com_url)
            return result
        except:
            logging.exception("获取评论页数据 失败")
            return None
コード例 #21
0
def runForever(port):
    sock = socket.socket(
        family=socket.AF_INET, type=socket.SOCK_STREAM
    )  # according to <https://docs.python.org/3/library/socket.html#socket.AF_INET>
    sock.bind(("", port))
    sock.listen(5)
    readers = {
        sock: None,
    }

    while True:
        try:
            readables, *_ = select.select(readers, [], [])
        except:
            print("Keyboard interrupt. Exitting.")

            for v in readers.keys():  # clean up
                v.close()

            break

        for readable in readables:
            if readable is sock:  # new connection coming in
                (connection, (ip, port)) = sock.accept()
                readers[connection] = {
                    "state": "header",
                    "header": bytearray()
                }
            else:  # other clients
                if readers[readable][
                        "state"] == "header":  # in the state of reading header
                    chunk = readable.recv(1)
                    readers[readable]["header"].extend(chunk)
                    if readers[readable]["header"].endswith(
                            b"\r\n" * 2):  # request header fully transferred
                        try:
                            request = Request.fromBytes(
                                readers[readable]
                                ["header"])  # parse request header
                        except:  # fail to parse header
                            traceback.print_exc()
                            response = Response(
                                403,
                                body=b"HTTP request is invalid: <pre>" +
                                readers[readable]["header"] + b"</pre>")
                            readable.sendall(bytes(response))
                            readable.close()
                            print("{} {} {}".format(request.method,
                                                    request.pathname,
                                                    response.statusCode))
                            readers.pop(readable)
                            continue

                        if (
                                "Content-Length" in request.headers
                                and request.headers["Content-Length"] == 0
                        ) or "Content-Length" not in request.headers:  # if Content-Length: 0 or Content-Length not available, serve immediately
                            response = staticFile(
                                request.pathname)  # generate response
                            readable.sendall(bytes(response))  # serve response
                            readable.close()
                            print("{} {} {}".format(request.method,
                                                    request.pathname,
                                                    response.statusCode))
                            del readers[readable]
                        else:  # need to read the whole request body
                            readers[readable]["state"] = "body"
                            readers[readable]["request"] = request
                            readers[readable].pop("header")
                            continue
                    else:  # request header not fully transferred
                        continue  # keep reading in the next iteration
                else:  # in the state of reading body
                    chunk = readable.recv(4096)
                    request = readers[readable]["request"]
                    request.body.extend(chunk)
                    if len(request.body) >= int(
                            request.headers["Content-Length"]
                    ):  # there is a Content-Length, guaranteed, because we have served all requests that do not have one already
                        response = staticFile(request.pathname)
                        readable.sendall(bytes(response))
                        readable.close()
                        print("{} {} {}".format(request.method,
                                                request.pathname,
                                                response.statusCode))
                        readers.pop(readable)
                    else:
                        continue
コード例 #22
0
ファイル: main.py プロジェクト: edo92/Prob-Control
 def __init__(self):
     url = os.getenv('SERVER_URI')
     self.request = Request(url)
コード例 #23
0
    def get_arts_brief_json_by_category(self, category):
        """ 这是万恶之源?

        参考接口:
            http://m.toutiao.com/list/?tag=__all__&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618

        :param category:
            '社会': 'news_society',
            '娱乐': 'news_entertainment',
            '科技': 'news_tech',
            '军事': 'news_military',
            '体育': 'news_sports'
            '汽车': 'news_car',
            '财经': 'news_finance',
            '国际': 'news_world',
            '时尚': 'news_fashion',
            '旅游': 'news_travel',
            '探索': 'news_discovery',
            '育儿': 'news_baby',
            '养生': 'news_regimen',
            '故事': 'news_story',
            '美文': 'news_essay',
            '游戏': 'news_game',
            '历史': 'news_history',
            '美食': 'news_food',
        :return:
            example:
            [
                Object{...},
                Object{...},
                Object{...},
                {
                    "media_name":"呦呦科学馆",
                    "ban_comment":0,
                    "abstract":"大家对屎壳郎都会有所耳闻,屎壳郎的原名是蜣螂(qiāng láng),一种痴迷于推粪球的昆虫,我们一直以来都没有什么正面评价,有时候在评价一个人不好的时候,会把他说成是屎壳郎。虽然屎壳郎在我们这儿,没有什么正面的评价,但是有些地方把它当成是神哦!不是负面的神,而是太阳神!",
                    "image_list":[
                        {
                            "url":"http://p6-tt.byteimg.com/img/pgc-image/a9b944c7417847e687b5f97aeb2798ad~tplv-tt-cs0:640:360.jpg",
                            "width":640,
                            "height":360
                        },
                        {
                            "url":"http://p6-tt.byteimg.com/img/pgc-image/2ca2e18621f942daa6b7e602205b4492~tplv-tt-cs0:525:576.jpg",
                            "width":525,
                            "height":295
                        },
                        {
                            "url":"http://p9-tt.byteimg.com/img/pgc-image/3605e63e394e45b494d2cc5914662a8c~tplv-tt-cs0:628:344.jpg",
                            "width":628,
                            "height":344
                        }
                    ],
                    "datetime":"2020-01-03 20:30",
                    "article_type":0,
                    "more_mode":true,
                    "tag":"news_story",
                    "has_m3u8_video":0,
                    "display_dt":1577527020,
                    "has_mp4_video":0,
                    "aggr_type":1,
                    "cell_type":0,
                    "article_sub_type":0,
                    "bury_count":0,
                    "title":"屎壳郎的故事",
                    "source_icon_style":1,
                    "tip":0,
                    "has_video":false,
                    "share_url":"http://toutiao.com/a6775056296904229390/?app=news_article&is_hit_share_recommend=0",
                    "source":"呦呦科学馆",
                    "comment_count":0,
                    "article_url":"http://toutiao.com/group/6775056296904229390/",
                    "publish_time":1577527020,
                    "group_flags":0,
                    "gallary_image_count":4,
                    "action_extra":"{"channel_id": 3189398979}",
                    "tag_id":"6775056296904229390",
                    "source_url":"/i6775056296904229390/",
                    "display_url":"http://toutiao.com/group/6775056296904229390/",
                    "is_stick":false,
                    "item_id":"6775056296904229390",
                    "repin_count":12,
                    "cell_flag":262155,
                    "source_open_url":"sslocal://profile?uid=566976876133454",
                    "level":0,
                    "digg_count":6,
                    "behot_time":1578054637,
                    "hot":0,
                    "cursor":1578054637999,
                    "url":"http://toutiao.com/group/6775056296904229390/",
                    "user_repin":0,
                    "has_image":true,
                    "video_style":0,
                    "media_info":{
                        "avatar_url":"http://p1.pstatp.com/large/ffe800001f90d3b65398",
                        "media_id":1629031487078411,
                        "name":"呦呦科学馆",
                        "user_verified":true
                    },
                    "group_id":"6775056296904229390"
                },
                Object{...}
            ]

        """
        try:
            url = 'http://m.toutiao.com/list/?tag={0}&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618'.format(
                category)
            headers = {
                "Host":
                "m.toutiao.com",
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            cookies = {
                "CNZZDATA1259612802\t":
                "1495391056-1572663800-https%253A%252F%252Fwww.toutiao.com%252F%7C1573891961\t",
                "UM_distinctid\t":
                "16e2a5bff8b3ef-01fd3a213050ad-1d3a6a5b-1aeaa0-16e2a5bff8cf52\t",
                "WEATHER_CITY": "%E5%8C%97%E4%BA%AC\t",
                "__tasessionId\t": "c1wuholqh1575530017796\t",
                "_ga": "GA1.2.268549673.1572668703",
                "csrftoken": "4e76bad8185f77ea8b647e50e3bb0e26",
                "s_v_web_id": "7fabbf42df76a1ccd37bfea2d2c5db76",
                "tt_webid": "6754560229981750791"
            }
            result = Request.Request(url, headers, cookies).more()['data']
            # print(result)
            logging.info("获取新闻缩率信息 %s 成功" % url)
            return result
        except:
            logging.exception("获取新闻缩率信息 失败")
            return None
コード例 #24
0
    def get_art_json(self, art_brief_json):
        """ 获得一则新闻的具体内荣

        参考接口:
            http://m.toutiao.com/i6364969235889783298/info/

        :param art_brief_json:
        :return:
            example:
            {
                "detail_source":"正向娱乐energy",
                "media_user":{
                    "screen_name":"正向娱乐energy",
                    "no_display_pgc_icon":false,
                    "avatar_url":"http://p1.pstatp.com/thumb/ff0600002c7db3631cde",
                    "id":"52681187308",
                    "user_auth_info":{
                        "auth_type":"0",
                        "other_auth":{
                            "interest":"优质娱乐领域创作者"
                        },
                        "auth_info":"青云计划获奖者 优质娱乐领域创作者"
                    }
                },
                "publish_time":1574065021,
                "hotwords":[
                    {
                        "stress_type":0,
                        "hot_word":"录明星整容视频勒索"
                    },
                    {
                        "stress_type":1,
                        "hot_word":"宁静再演孝庄"
                    },
                    ...
                ],
                "labels":[

                ],
                "title":"李诞向左,池子向右,脱口秀背后的悲喜人生",
                "url":"http://toutiao.com/group/6760557790046978567/",
                "high_quality_flag":"0",
                "impression_count":"790339",
                "is_original":true,
                "is_pgc_article":true,
                "content":"<div class="pgc-img">... ... 剩下的都是文章内容的 HTML 形式",
                "source":"正向娱乐energy",
                "comment_count":326,
                "logo_show_strategy":"normal",
                "hupu_content_image_urls":[
                    ""
                ],
                "creator_uid":52408555030
            }
        """
        try:
            art_url = 'http://m.toutiao.com/i{0}/info/'.format(
                art_brief_json['item_id'])
            headers = {
                "Host":
                "m.toutiao.com",
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            result = Request.Request(art_url, headers).more()['data']
            logging.info("获取新闻内容数据 %s 成功" % art_url)
            return result
        except:
            logging.exception("获取新闻内容数据 失败")
            return None
コード例 #25
0
    def get_arts_brief_json_by_category(self, category):
        """ 获取文章缩率信息, 不包括文章内容

        # 20-04-17 方法检查 OK

        参考接口:
            http://m.toutiao.com/list/?tag=__all__&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618

        :param category:
            '社会': 'news_society',
            '娱乐': 'news_entertainment',
            '科技': 'news_tech',
            '军事': 'news_military',
            '体育': 'news_sports'
            '汽车': 'news_car',
            '财经': 'news_finance',
            '国际': 'news_world',
            '时尚': 'news_fashion',
            '旅游': 'news_travel',
            '探索': 'news_discovery',
            '育儿': 'news_baby',
            '养生': 'news_regimen',
            '故事': 'news_story',
            '美文': 'news_essay',
            '游戏': 'news_game',
            '历史': 'news_history',
            '美食': 'news_food',
        :return:
            example:
            [
                Object{...},
                Object{...},
                Object{...},
                {
                    "media_name":"呦呦科学馆",
                    "ban_comment":0,
                    "abstract":"大家对屎壳郎都会有所耳闻,屎壳郎的原名是蜣螂(qiāng láng),一种痴迷于推粪球的昆虫,我们一直以来都没有什么正面评价,有时候在评价一个人不好的时候,会把他说成是屎壳郎。虽然屎壳郎在我们这儿,没有什么正面的评价,但是有些地方把它当成是神哦!不是负面的神,而是太阳神!",
                    "image_list":[
                        {
                            "url":"http://p6-tt.byteimg.com/img/pgc-image/a9b944c7417847e687b5f97aeb2798ad~tplv-tt-cs0:640:360.jpg",
                            "width":640,
                            "height":360
                        },
                        {
                            "url":"http://p6-tt.byteimg.com/img/pgc-image/2ca2e18621f942daa6b7e602205b4492~tplv-tt-cs0:525:576.jpg",
                            "width":525,
                            "height":295
                        },
                        {
                            "url":"http://p9-tt.byteimg.com/img/pgc-image/3605e63e394e45b494d2cc5914662a8c~tplv-tt-cs0:628:344.jpg",
                            "width":628,
                            "height":344
                        }
                    ],
                    "datetime":"2020-01-03 20:30",
                    "article_type":0,
                    "more_mode":true,
                    "tag":"news_story",
                    "has_m3u8_video":0,
                    "display_dt":1577527020,
                    "has_mp4_video":0,
                    "aggr_type":1,
                    "cell_type":0,
                    "article_sub_type":0,
                    "bury_count":0,
                    "title":"屎壳郎的故事",
                    "source_icon_style":1,
                    "tip":0,
                    "has_video":false,
                    "share_url":"http://toutiao.com/a6775056296904229390/?app=news_article&is_hit_share_recommend=0",
                    "source":"呦呦科学馆",
                    "comment_count":0,
                    "article_url":"http://toutiao.com/group/6775056296904229390/",
                    "publish_time":1577527020,
                    "group_flags":0,
                    "gallary_image_count":4,
                    "action_extra":"{"channel_id": 3189398979}",
                    "tag_id":"6775056296904229390",
                    "source_url":"/i6775056296904229390/",
                    "display_url":"http://toutiao.com/group/6775056296904229390/",
                    "is_stick":false,
                    "item_id":"6775056296904229390",
                    "repin_count":12,
                    "cell_flag":262155,
                    "source_open_url":"sslocal://profile?uid=566976876133454",
                    "level":0,
                    "digg_count":6,
                    "behot_time":1578054637,
                    "hot":0,
                    "cursor":1578054637999,
                    "url":"http://toutiao.com/group/6775056296904229390/",
                    "user_repin":0,
                    "has_image":true,
                    "video_style":0,
                    "media_info":{
                        "avatar_url":"http://p1.pstatp.com/large/ffe800001f90d3b65398",
                        "media_id":1629031487078411,
                        "name":"呦呦科学馆",
                        "user_verified":true
                    },
                    "group_id":"6775056296904229390"
                },
                Object{...}
            ]

        :param category:
        :return:
        """
        try:
            url = 'http://m.toutiao.com/list/?tag={0}&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618'.format(category)
            headers = {
                "Host": "m.toutiao.com",
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            cookies = {
                "tt_webid": "6754560229981750791",
                "WEATHER_CITY": "%E5%8C%97%E4%BA%AC",
                "csrftoken": "4e76bad8185f77ea8b647e50e3bb0e26",
                "_ga": "GA1.2.268549673.1572668703",
                "__utmz": "24953151.1578051975.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
                "__utma": "24953151.268549673.1572668703.1578051975.1578221568.2",
                "SLARDAR_WEB_ID": "60060dd2-e5b2-470c-b7f4-09b8d877a031",
                "ttcid": "3f7ad0f028c54aa584367115a3fa7cb028",
                "__ac_nonce": "05ecba43c0082bbdfb3e",
                "__ac_signature": "O2auwAAgEBACmHx7dlnWxztnr9AAGWz3dGBVQL.r0H1se1MOmPcu.0Mxo.Y9Zem1qLCx5rV13rttFjnqiBP7d4KfTBbgN8Az4ip.Po5Ht9XBlX2CZW8ZIUZV9blZ9smX461",
                "s_v_web_id": "verify_kamdklmd_rcCyRT2X_ZLBe_4OQI_Brjy_Q7HFKjeCzPbt",
                "__tasessionId": "gx98j8z1x1590404157654",
                "tt_scid": "nLlc9z1mWQgXRhJFHC7i29KXmnHM9sZynN1Ue86iBmH5DPSaDONSaQYQz7mqHMcMf924"
            }


            result = Request.Request(url, headers, cookies).more()['data']
            # print(result)
            logging.info("获取新闻缩率信息 %s 成功" % url)
            return result
        except:
            # logging.exception("获取新闻缩率信息 失败")
            return None
コード例 #26
0
 def add_requests(self, requests):
     for url, period in requests:
         self.add_request(Request(url), period=period)
コード例 #27
0
 def perform(self, paste, analyzer_name=None):
     """Trigger the webhook"""
     # TODO - More post options ([custom] body, template, choose between GET/POST etc.)
     r = Request()
     r.post(url=self.url)
コード例 #28
0
class PastebinScraper(BasicScraper):
    name = "PastebinScraper"
    api_base_url = "https://scrape.pastebin.com"

    def __init__(self, paste_queue=None, exception_event=None):
        super().__init__(exception_event)
        self.logger = logging.getLogger(__name__)
        self._last_scrape_time = 0
        self.paste_queue = paste_queue or Queue()
        self._tmp_paste_queue = Queue()

        self._known_pastes = []
        self._known_pastes_limit = 1000

        self.request = Request()

    def _check_error(self, body):
        """Checks if an error occurred and raises an exception if it did"""
        if body is None:
            raise EmptyBodyException()

        if "DOES NOT HAVE ACCESS" in body:
            self._exception_event.set()
            raise IPNotRegisteredError()

    def _get_recent(self, limit=100):
        """Downloads a list of the most recent pastes - the amount is limited by the <limit> parameter"""
        endpoint = "api_scraping.php"
        api_url = "{0}/{1}?limit={2}".format(self.api_base_url, endpoint, limit)

        try:
            response_data = self.request.get(api_url)

            self._check_error(response_data)

            pastes_dict = json.loads(response_data)
            pastes = []

            # Loop through the response and create objects by the data
            for paste in pastes_dict:
                paste_obj = Paste(key=paste.get("key"),
                                  title=paste.get("title"),
                                  user=paste.get("user"),
                                  size=paste.get("size"),
                                  date=paste.get("date"),
                                  expire=paste.get("expire"),
                                  syntax=paste.get("syntax"),
                                  scrape_url=paste.get("scrape_url"),
                                  full_url=paste.get("full_url"))
                pastes.append(paste_obj)

            return pastes
        except Exception as e:
            self.logger.error(e)
            return None

    def _get_paste_content(self, key):
        """Downloads the content of a certain paste"""
        endpoint = "api_scrape_item.php"
        api_url = "{0}/{1}?i={2}".format(self.api_base_url, endpoint, key)
        content = ""

        self.logger.debug("Downloading paste {0}".format(key))
        try:
            response_data = self.request.get(api_url)

            self._check_error(response_data)

            content = response_data
        except Exception as e:
            self.logger.error(e)

        return content

    def _body_downloader(self):
        """Downloads the body of pastes from pastebin, which have been put into the queue"""
        while self.running:
            try:
                self.logger.debug("Queue size: {}".format(self._tmp_paste_queue.qsize()))

                if self._stop_event.is_set() or self._exception_event.is_set():
                    self.running = False
                    break

                paste = self._tmp_paste_queue.get(True, 1)

                # if paste is not known, download the body and put it on the queue and into the list
                last_body_download_time = round(time.time(), 2)
                body = self._get_paste_content(paste.key)

                paste.set_body(body)
                self.paste_queue.put(paste)

                current_time = round(time.time(), 2)
                diff = round(current_time - last_body_download_time, 2)

                if diff >= 1:
                    continue

                sleep_diff = round(1 - diff, 3)
                self.logger.debug("Sleep time is: {0}".format(sleep_diff))
                time.sleep(sleep_diff)
            except Empty:
                continue

    def start(self, paste_queue):
        """Start the scraping process and download the paste metadata"""
        self.paste_queue = paste_queue
        self.running = True
        start_thread(self._body_downloader, "BodyDownloader", self._exception_event)

        while self.running:
            self._last_scrape_time = int(time.time())
            pastes = self._get_recent(limit=100)
            counter = 0

            if pastes is not None:
                for paste in pastes:
                    # check if paste is in list of known pastes
                    if paste.key in self._known_pastes:
                        # Do nothing, if it's already known
                        continue

                    self.logger.debug("Paste is unknown - adding ot to list {}".format(paste.key))
                    self._tmp_paste_queue.put(paste)
                    self._known_pastes.append(paste.key)
                    counter += 1

                    if self._stop_event.is_set() or self._exception_event.is_set():
                        self.running = False
                        break

                self.logger.debug("{0} new pastes fetched!".format(counter))

            # Delete some of the last pastes to not run into memory/performance issues
            if len(self._known_pastes) > 1000:
                self.logger.debug("known_pastes > 1000 - cleaning up!")
                start_index = len(self._known_pastes) - self._known_pastes_limit
                self._known_pastes = self._known_pastes[start_index:]

            if self._stop_event.is_set() or self._exception_event.is_set():
                self.logger.debug('stopping {0}'.format(self.name))
                self.running = False
                break

            # check if time since last
            current_time = int(time.time())
            diff = current_time - self._last_scrape_time

            # if the last scraping happened less than 60 seconds ago,
            # wait until the 60 seconds passed
            if diff < 60:
                sleep_time = 60 - diff
                time.sleep(sleep_time)
コード例 #29
0
from uuid import uuid1

from azure.storage.table.tableservice import TableService

from _bareasgi import text_reader, text_response, json_response
import config
from config import azure_storage
from util import Request, URL
from phrase_metric import similarity

table_service = TableService(account_name=azure_storage['account'],
                             account_key=azure_storage['key'])

answer_search = Request(
    'GET',
    config.answer_search.url,
    params={'mkt': 'en-us'},
    headers={'Ocp-Apim-Subscription-Key': config.answer_search.key})

qna_maker = Request(
    'POST',
    config.qna_maker.url,
    headers={'Authorization': f'EndpointKey {config.qna_maker.key}'})


def uuid():
    return uuid1().hex


def get_answer(answer):
    global _answer