Ejemplo n.º 1
0
def request(flow):
    if flow.request.url.startswith("https://www.youtube.com/ptracking"):
        OK(flow)
        return
    if flow.request.url.startswith(
            "https://www.youtube.com/api/stats/playback"):
        OK(flow)
        return
    if flow.request.url.startswith(
            "https://www.youtube.com/api/stats/watchtime"):
        OK(flow)
        return
    if flow.request.method == "POST":
        OK(flow)
        return
    if "googlevideo.com/videoplayback" in flow.request.url:
        from mitmproxy import http

        query_args = dict(flow.request.query)
        file_id = query_args["id"]
        file_range = query_args["range"]
        headers, data = get_cached_data(flow.request)
        headers = list(headers)
        flow.error = None
        flow.response = http.HTTPResponse(b"HTTP/1.1", 200, b"OK", headers,
                                          data)
        now = datetime.datetime.now()
        then = now - datetime.timedelta(hours=1)
        flow.response.timestamp_start = time.mktime(then.timetuple())
        flow.response.refresh()
        log("SENT FILE %s IN CACHE - range %s" % (file_id, file_range))
Ejemplo n.º 2
0
def twebsocketflow(client_conn=True,
                   server_conn=True,
                   messages=True,
                   err=None,
                   handshake_flow=True):

    if client_conn is True:
        client_conn = tclient_conn()
    if server_conn is True:
        server_conn = tserver_conn()
    if handshake_flow is True:
        req = http.HTTPRequest("relative",
                               "GET",
                               "http",
                               "example.com",
                               "80",
                               "/ws",
                               "HTTP/1.1",
                               headers=net_http.Headers(
                                   connection="upgrade",
                                   upgrade="websocket",
                                   sec_websocket_version="13",
                                   sec_websocket_key="1234",
                               ),
                               content=b'')
        resp = http.HTTPResponse(
            "HTTP/1.1",
            101,
            reason=net_http.status_codes.RESPONSES.get(101),
            headers=net_http.Headers(
                connection='upgrade',
                upgrade='websocket',
                sec_websocket_accept=b'',
            ),
            content=b'',
        )
        handshake_flow = http.HTTPFlow(client_conn, server_conn)
        handshake_flow.request = req
        handshake_flow.response = resp

    f = websocket.WebSocketFlow(client_conn, server_conn, handshake_flow)
    handshake_flow.metadata['websocket_flow'] = f

    if messages is True:
        messages = [
            websocket.WebSocketMessage(websockets.OPCODE.BINARY, True,
                                       b"hello binary"),
            websocket.WebSocketMessage(websockets.OPCODE.TEXT, True,
                                       "hello text".encode()),
            websocket.WebSocketMessage(websockets.OPCODE.TEXT, False,
                                       "it's me".encode()),
        ]
    if err is True:
        err = terr()

    f.messages = messages
    f.error = err
    f.reply = controller.DummyReply()
    return f
Ejemplo n.º 3
0
def OK(flow, code=204):
    """ Sending back a dummy response.

    204 is the default in most cases on YT requests.
    """
    from mitmproxy import http

    flow.error = None
    flow.response = http.HTTPResponse(b"HTTP/1.1", code, b"OK", {}, b"")
Ejemplo n.º 4
0
def request(flow):
    # in some cases, the YT client sends requests with a methode of the form:
    #   VAR=XX%3GET /xxx
    # this will clean it up:
    method = flow.request.method
    method = unquote(method).split("=")
    flow.request.method = method[-1]

    # All requests made for stats purposes can be discarded and
    # a 204 sent back to the client.
    if flow.request.url.startswith("https://www.youtube.com/ptracking"):
        OK(flow)
        return
    if flow.request.url.startswith(
            "https://www.youtube.com/api/stats/playback"):
        OK(flow)
        return
    if flow.request.url.startswith(
            "https://www.youtube.com/api/stats/watchtime"):
        OK(flow)
        return
    # disable a few trackers, sniffers, etc
    if "push.services.mozilla.com" in flow.request.url:
        OK(flow, code=200)
        return
    if "tracking-protection.cdn.mozilla.net" in flow.request.url:
        OK(flow, code=200)
        return
    if "gen_204" in flow.request.url:
        OK(flow)
        return

    # we don't want to post back any data, discarding.
    if flow.request.method == "POST":
        OK(flow)
        return
    if "googlevideo.com/videoplayback" in flow.request.url:
        from mitmproxy import http

        query_args = dict(flow.request.query)
        file_id = query_args["id"]
        file_range = query_args["range"]
        try:
            headers, data = get_cached_data(flow.request)
        except Exception:
            OK(flow, code=404)
            return
        headers = list(headers)
        flow.error = None
        flow.response = http.HTTPResponse(b"HTTP/1.1", 200, b"OK", headers,
                                          data)
        now = datetime.datetime.now()
        then = now - datetime.timedelta(hours=1)
        flow.response.timestamp_start = time.mktime(then.timetuple())
        flow.response.refresh()
        log("SENT FILE %s IN CACHE - range %s" % (file_id, file_range))
def string_response(
        body: str,
        content_type: str = 'application/javascript') -> http.HTTPResponse:
    headers = net.http.Headers(Server="lol",
                               Connection="close",
                               Content_Length=str(len(body)),
                               Content_Type=content_type)
    return http.HTTPResponse(
        b"HTTP/1.1", 200, net.http.status_codes.RESPONSES.get(200, "Unknown"),
        headers, body)
Ejemplo n.º 6
0
    def read_response_headers(self):
        self.response_arrived.wait()

        self.raise_zombie()

        status_code = int(self.response_headers.get(':status', 502))
        headers = self.response_headers.copy()
        headers.pop(":status", None)

        return http.HTTPResponse(
            http_version=b"HTTP/2.0",
            status_code=status_code,
            reason=b'',
            headers=headers,
            content=None,
            timestamp_start=self.timestamp_start,
            timestamp_end=self.timestamp_end,
        )
Ejemplo n.º 7
0
def health_check_response(flow):
    # if log_debug:
    #     bubble_log.debug('health_check_response: special bubble health check request, responding with OK')
    response_headers = nheaders.Headers()
    response_headers[HEADER_HEALTH_CHECK] = 'OK'
    response_headers[HEADER_CONTENT_LENGTH] = '3'
    if flow.response is None:
        flow.response = http.HTTPResponse(http_version='HTTP/1.1',
                                          status_code=200,
                                          reason='OK',
                                          headers=response_headers,
                                          content=b'OK\n')
    else:
        flow.response.headers = nheaders.Headers()
        flow.response.headers = response_headers
        flow.response.status_code = 200
        flow.response.reason = 'OK'
        flow.response.stream = lambda chunks: [b'OK\n']
Ejemplo n.º 8
0
def tarpit_response(flow, host):
    # if log_debug:
    #     bubble_log.debug('health_check_response: special bubble health check request, responding with OK')
    response_headers = nheaders.Headers()
    if host is None:
        host = PUBLIC_IP
    response_headers[HEADER_LOCATION] = 'http://' + host + ':' + str(
        TARPIT_PORT) + '/admin/index.php'
    if flow.response is None:
        flow.response = http.HTTPResponse(http_version='HTTP/1.1',
                                          status_code=301,
                                          reason='Moved Permanently',
                                          headers=response_headers,
                                          content=b'')
    else:
        flow.response.headers = nheaders.Headers()
        flow.response.headers = response_headers
        flow.response.status_code = 301
        flow.response.reason = 'Moved Permanently'
Ejemplo n.º 9
0
    def handle_h2_event(self,
                        event: h2.events.Event) -> CommandGenerator[bool]:
        if isinstance(event, h2.events.ResponseReceived):
            if self.streams.get(event.stream_id,
                                None) is not StreamState.EXPECTING_HEADERS:
                yield from self.protocol_error(
                    f"Received unexpected HTTP/2 response.")
                return True

            try:
                status_code, headers = parse_h2_response_headers(event.headers)
            except ValueError as e:
                yield from self.protocol_error(
                    f"Invalid HTTP/2 response headers: {e}")
                return True

            response = http.HTTPResponse(
                http_version=b"HTTP/2.0",
                status_code=status_code,
                reason=b"",
                headers=headers,
                content=None,
                trailers=None,
                timestamp_start=time.time(),
                timestamp_end=None,
            )
            self.streams[event.stream_id] = StreamState.HEADERS_RECEIVED
            yield ReceiveHttp(
                ResponseHeaders(event.stream_id, response,
                                bool(event.stream_ended)))
            return False
        elif isinstance(event, h2.events.RequestReceived):
            yield from self.protocol_error(
                f"HTTP/2 protocol error: received request from server")
            return True
        elif isinstance(event, h2.events.RemoteSettingsChanged):
            # We have received at least one settings from now,
            # which means we can rely on the max concurrency in remote_settings
            self.provisional_max_concurrency = None
            return (yield from super().handle_h2_event(event))
        else:
            return (yield from super().handle_h2_event(event))
Ejemplo n.º 10
0
def twebsocketflow(client_conn=True,
                   server_conn=True,
                   messages=True,
                   err=None,
                   handshake_flow=True):

    if client_conn is True:
        client_conn = tclient_conn()
    if server_conn is True:
        server_conn = tserver_conn()
    if handshake_flow is True:
        req = http.HTTPRequest(
            "example.com",
            80,
            b"GET",
            b"http",
            b"example.com",
            b"/ws",
            b"HTTP/1.1",
            headers=net_http.Headers(
                connection="upgrade",
                upgrade="websocket",
                sec_websocket_version="13",
                sec_websocket_key="1234",
            ),
            content=b'',
            trailers=None,
            timestamp_start=946681200,
            timestamp_end=946681201,
        )
        resp = http.HTTPResponse(
            b"HTTP/1.1",
            101,
            reason=net_http.status_codes.RESPONSES.get(101),
            headers=net_http.Headers(
                connection='upgrade',
                upgrade='websocket',
                sec_websocket_accept=b'',
            ),
            content=b'',
            trailers=None,
            timestamp_start=946681202,
            timestamp_end=946681203,
        )
        handshake_flow = http.HTTPFlow(client_conn, server_conn)
        handshake_flow.request = req
        handshake_flow.response = resp

    f = websocket.WebSocketFlow(client_conn, server_conn, handshake_flow)
    f.metadata['websocket_handshake'] = handshake_flow.id
    handshake_flow.metadata['websocket_flow'] = f.id
    handshake_flow.metadata['websocket'] = True

    if messages is True:
        messages = [
            websocket.WebSocketMessage(Opcode.BINARY, True, b"hello binary"),
            websocket.WebSocketMessage(Opcode.TEXT, True, b"hello text"),
            websocket.WebSocketMessage(Opcode.TEXT, False, b"it's me"),
        ]
    if err is True:
        err = terr()

    f.messages = messages
    f.error = err
    f.reply = controller.DummyReply()
    return f
Ejemplo n.º 11
0
def OK(flow, code=204):
    from mitmproxy import http

    flow.error = None
    flow.response = http.HTTPResponse(b"HTTP/1.1", code, b"OK", {}, b"")
Ejemplo n.º 12
0
def special_bubble_response(flow):
    name = 'special_bubble_response'
    path = flow.request.path
    if is_bubble_health_check(path):
        health_check_response(flow)
        return

    uri = make_bubble_special_path(path)
    if log_debug:
        bubble_log.debug('special_bubble_response: sending special bubble ' +
                         flow.request.method + ' to ' + uri)
    headers = {
        'Accept': 'application/json',
        'Content-Type': 'application/json'
    }
    if flow.request.method == 'GET':
        loop = asyncio.new_event_loop()
        client = async_client(timeout=30)
        response = async_stream(client, name, uri, headers=headers, loop=loop)

    elif flow.request.method == 'POST':
        if include_request_headers(flow.request.path):
            if log_debug:
                bubble_log.debug(
                    'special_bubble_request: including client headers: ' +
                    repr(flow.request.headers))
            # add client request headers
            for name, value in flow.request.headers.items():
                headers['X-Bubble-Client-Header-' + name] = value
            if log_debug:
                bubble_log.debug('special_bubble_request: NOW headers=' +
                                 repr(headers))

        data = None
        if flow.request.content and flow.request.content:
            headers[HEADER_CONTENT_LENGTH] = str(len(flow.request.content))
            data = flow.request.content

        loop = asyncio.new_event_loop()
        client = async_client(timeout=30)
        response = async_stream(client,
                                name,
                                uri,
                                headers=headers,
                                method='POST',
                                data=data,
                                loop=loop)

    else:
        if log_warning:
            bubble_log.warning(
                'special_bubble_response: special bubble request: method ' +
                flow.request.method + ' not supported')
        return

    if flow.response is None:
        http_version = response.http_version
        response_headers = collect_response_headers(response)
        flow.response = http.HTTPResponse(http_version=http_version,
                                          status_code=response.status_code,
                                          reason=response.reason_phrase,
                                          headers=response_headers,
                                          content=None)
    if response is not None:
        # if log_debug:
        #     bubble_log.debug('special_bubble_response: special bubble request: response status = '+str(response.status_code))
        flow.response.headers = collect_response_headers(response)
        flow.response.status_code = response.status_code
        flow.response.reason = status_reason(response.status_code)
        flow.response.stream = AsyncStreamBody(owner=client,
                                               loop=loop,
                                               chunks=response.aiter_raw(),
                                               finalize=cleanup_async(
                                                   uri, loop, client,
                                                   response))
Ejemplo n.º 13
0
    def request(self, flow):
        ctx.log.debug('request():')
        block = False
        handled = False
        status_code = 0
        reason = ''
        content_type = ''
        data = ''
        cache = None
        # pretty_host(hostheader=True) takes the Host: header of the request into account,
        # which is useful in transparent mode where we usually only have the IP otherwise.
        host = flow.request.pretty_host

        orig_url = flow.request.url
        try:
            urlparts = urlparse(orig_url)
        except Exception as e:
            logging.critical('Unable to parse URL {}: {}'.format(orig_url, e))

        url = urlunsplit(
            [urlparts.scheme, host, urlparts.path, urlparts.query, ''])

        self.req_id += 1
        rid = '%06d' % (self.req_id)

        if host in self.config['rules'].keys():
            if re.search(self.config['rules'][host], self.get_path(url)):
                ctx.log.debug(rid + ' Rule match: %s %s' %
                              (host, self.config['rules'][host]))
                if 'referer' in flow.request.headers:  #.keys():
                    ctx.log.info(rid + ' Referred by: %s' %
                                 (flow.request.headers['referer']))
                cache = CacheFile(url, self.config['local_cache'])
                if cache.is_in_cache():
                    cache.load()
                    ctx.log.info(rid + ' Retrieved from cache: ' + url)
                    data = cache.data
                    ctx.log.debug(pformat(cache.headers))
                    content_type = cache.headers['Content-Type']
                    status_code = 200
                    reason = "OK2"
                    ctx.log.debug(rid + ' Cached data loaded. ')
                else:  # not cached
                    if self.config['download_missing']:
                        if cache.retrieve():
                            ctx.log.info(rid + ' Downloaded: ' + url)
                            data = cache.data
                            content_type = cache.headers['Content-Type']
                            status_code = 200
                            reason = "OK3"
                        elif cache.code == 404:
                            ctx.log.error(rid + ' ERROR: ' + cache.error_text)
                            data = ''
                            content_type = "text/html"
                            status_code = cache.code
                            reason = "Not found"
                        else:
                            ctx.log.error(rid + ' ERROR: ' + cache.error_text)
                            data = ''
                            content_type = "text/html"
                            status_code = 500
                            reason = "FAILED TO RETRIEVE"
                    else:  #  not download_missing
                        data = ''
                        content_type = "text/html"
                        status_code = 404
                        reason = "NOPE"
                # end else: #not cached
                handled = True
            # end if re.search(self.config['rules'][host],get_path(url)):
            #else: # not re.search(self.config['rules'][host],self.get_path(url)):
            #  block = self.config['default_policy_is_block']
        if not handled:
            if host in self.config['passthrough'].keys():
                if re.search(self.config['passthrough'][host],
                             self.get_path(url)):
                    ctx.log.info(
                        '{} ****************************** PASSTHROUGH {} - {}: {}'
                        .format(rid, host, self.config['passthrough'][host],
                                url))
                    return
                else:
                    block = self.config['default_policy_is_block']
            elif '*' in self.config['passthrough'].keys():
                if re.search(self.config['passthrough']['*'],
                             self.get_path(url)):
                    ctx.log.info(
                        '{} ****************************** PASSTHROUGH {} - {}: {}'
                        .format(rid, '*', self.config['passthrough']['*'],
                                url))
                    return
                else:
                    block = self.config['default_policy_is_block']
            else:  # host NOT in self.config['rules'].keys()
                block = self.config['default_policy_is_block']

        if self.config['default_policy_is_block']:
            ctx.log.info('Default policy: BLOCK')

        if block == True:
            ctx.log.info('URL blocked: ' + url)
            # Use flow.kill(resp) ??
            content_type = "text/html"
            reason = "1/1e100"
            if self.config['suggest_archiveorg']:
                data = '<html><b>1/1e100</b><br ><a href="https://web.archive.org/web/*/%s">https://web.archive.org/web/*/%s</a></html>' % (
                    url, url)
                status_code = 200
                ctx.log.debug('a.o!')
            else:
                data = '1/1e100'
                status_code = 403
                ctx.log.info(rid + ' BLOCKED: ' + url)

        if type(data) == str:
            data = data.encode()
        if type(reason) == str:
            reason = reason.encode()
        if type(content_type) == str:
            content_type = content_type.encode()

        print('content_type={} block={} reason={}'.format(
            content_type, block, reason))

        if cache is not None:
            headers = cache.headers
        else:
            headers = Headers(Content_Type=content_type, Crampus='Engaged')

        resp = http.HTTPResponse(
            'HTTP/1.1'  # http://stackoverflow.com/questions/34677062/return-custom-response-with-mitmproxy
            ,
            status_code,
            reason,
            headers,
            data)
        ctx.log.debug('RESPONSE, new: ' + pformat(resp))
        flow.response = resp
        #flow.kill()
        ctx.log.debug("request() done. ---------------------------------")