예제 #1
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            if callable(exc.status):
                status = exc.status()
            else:
                status = exc.status
            # wsgi requires status
            #  - to have at least 4 characters and
            #  - to start with a number / integer
            if type(status) == int:
                status = '{} Exception {}'.format(status, type(exc).__name__)
            elif type(status) == str and status[0].isdigit():
                pass
            else:
                status = '500 Internal Server Error'
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = str(exc.args[0])

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg, status=status)
예제 #2
0
파일: cdxobject.py 프로젝트: chdorner/pywb
    def __init__(self, cdxline=b''):
        OrderedDict.__init__(self)

        cdxline = cdxline.rstrip()
        self._from_json = False
        self._cached_json = None

        # Allows for filling the fields later or in a custom way
        if not cdxline:
            self.cdxline = cdxline
            return

        fields = cdxline.split(b' ' , 2)
        # Check for CDX JSON
        if fields[-1].startswith(b'{'):
            self[URLKEY] = to_native_str(fields[0], 'utf-8')
            self[TIMESTAMP] = to_native_str(fields[1], 'utf-8')
            json_fields = json_decode(to_native_str(fields[-1], 'utf-8'))
            for n, v in six.iteritems(json_fields):
                n = to_native_str(n, 'utf-8')
                n = self.CDX_ALT_FIELDS.get(n, n)

                if n == 'url':
                    try:
                        v.encode('ascii')
                    except UnicodeEncodeError:
                        v = quote(v.encode('utf-8'), safe=':/')

                if n != 'filename':
                    v = to_native_str(v, 'utf-8')

                self[n] = v

            self.cdxline = cdxline
            self._from_json = True
            return

        more_fields = fields.pop().split(b' ')
        fields.extend(more_fields)

        cdxformat = None
        for i in self.CDX_FORMATS:
            if len(i) == len(fields):
                cdxformat = i

        if not cdxformat:
            msg = 'unknown {0}-field cdx format'.format(len(fields))
            raise CDXException(msg)

        for header, field in zip(cdxformat, fields):
            self[header] = field.decode('utf-8')

        self.cdxline = cdxline
예제 #3
0
파일: cdxobject.py 프로젝트: yarwelp/pywb
    def __init__(self, cdxline=b''):
        OrderedDict.__init__(self)

        cdxline = cdxline.rstrip()
        self._from_json = False
        self._cached_json = None

        # Allows for filling the fields later or in a custom way
        if not cdxline:
            self.cdxline = cdxline
            return

        fields = cdxline.split(b' ', 2)
        # Check for CDX JSON
        if fields[-1].startswith(b'{'):
            self[URLKEY] = to_native_str(fields[0], 'utf-8')
            self[TIMESTAMP] = to_native_str(fields[1], 'utf-8')
            json_fields = json_decode(to_native_str(fields[-1], 'utf-8'))
            for n, v in six.iteritems(json_fields):
                n = to_native_str(n, 'utf-8')
                n = self.CDX_ALT_FIELDS.get(n, n)

                if n == 'url':
                    try:
                        v.encode('ascii')
                    except UnicodeEncodeError:
                        v = quote(v.encode('utf-8'), safe=':/')

                if n != 'filename':
                    v = to_native_str(v, 'utf-8')

                self[n] = v

            self.cdxline = cdxline
            self._from_json = True
            return

        more_fields = fields.pop().split(b' ')
        fields.extend(more_fields)

        cdxformat = None
        for i in self.CDX_FORMATS:
            if len(i) == len(fields):
                cdxformat = i

        if not cdxformat:
            msg = 'unknown {0}-field cdx format'.format(len(fields))
            raise CDXException(msg)

        for header, field in zip(cdxformat, fields):
            self[header] = field.decode('utf-8')

        self.cdxline = cdxline
예제 #4
0
    def create_renew_sesh_id(self, sesh_id, force=False):
        #if sesh_id in self.cache and not force:
        if sesh_id and ((sesh_id + ':c') in self.cache) and not force:
            return sesh_id

        sesh_id = base64.b32encode(os.urandom(5)).lower()
        return to_native_str(sesh_id)
예제 #5
0
    def create_renew_sesh_id(self, sesh_id, force=False):
        # if sesh_id in self.cache and not force:
        if sesh_id and ((sesh_id + ":c") in self.cache) and not force:
            return sesh_id

        sesh_id = base64.b32encode(os.urandom(5)).lower()
        return to_native_str(sesh_id)
예제 #6
0
파일: wburl.py 프로젝트: chdorner/pywb
    def to_uri(url):
        """ Converts a url to an ascii %-encoded form
        where:
        - scheme is ascii,
        - host is punycode,
        - and remainder is %-encoded
        Not using urlsplit to also decode partially encoded
        scheme urls
        """
        parts = WbUrl.FIRST_PATH.split(url, 1)

        sep = url[len(parts[0])] if len(parts) > 1 else None

        scheme_dom = unquote_plus(parts[0])

        if six.PY2 and isinstance(scheme_dom, six.binary_type):
            if scheme_dom == parts[0]:
                return url

            scheme_dom = scheme_dom.decode('utf-8', 'ignore')

        scheme_dom = scheme_dom.rsplit('/', 1)
        domain = scheme_dom[-1]

        try:
            domain = to_native_str(domain.encode('idna'), 'utf-8')
        except UnicodeError:
            # the url is invalid and this is probably not a domain
            pass

        if len(scheme_dom) > 1:
            url = to_native_str(scheme_dom[0], 'utf-8') + '/' + domain
        else:
            url = domain

        if len(parts) > 1:
            url += sep

            rest = parts[1]
            try:
                rest.encode('ascii')
            except UnicodeEncodeError:
                rest = quote(to_native_str(rest, 'utf-8'))

            url += rest

        return url
예제 #7
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            status = exc.status()
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = exc.args[0]

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg,
                                           status=status)
예제 #8
0
    def to_uri(url):
        """ Converts a url to an ascii %-encoded form
        where:
        - scheme is ascii,
        - host is punycode,
        - and remainder is %-encoded
        Not using urlsplit to also decode partially encoded
        scheme urls
        """
        parts = WbUrl.FIRST_PATH.split(url, 1)

        sep = url[len(parts[0])] if len(parts) > 1 else None

        scheme_dom = unquote_plus(parts[0])

        if six.PY2 and isinstance(scheme_dom, six.binary_type):
            if scheme_dom == parts[0]:
                return url

            scheme_dom = scheme_dom.decode('utf-8', 'ignore')

        scheme_dom = scheme_dom.rsplit('/', 1)
        domain = scheme_dom[-1]

        try:
            domain = to_native_str(domain.encode('idna'), 'utf-8')
        except UnicodeError:
            # the url is invalid and this is probably not a domain
            pass

        if len(scheme_dom) > 1:
            url = to_native_str(scheme_dom[0], 'utf-8') + '/' + domain
        else:
            url = domain

        if len(parts) > 1:
            url += sep

            rest = parts[1]
            try:
                rest.encode('ascii')
            except UnicodeEncodeError:
                rest = quote(to_native_str(rest, 'utf-8'))

            url += rest

        return url
예제 #9
0
    def handle_exception(self, env, exc, print_trace):
        error_view = None

        if hasattr(self.wb_router, 'error_view'):
            error_view = self.wb_router.error_view

        if hasattr(exc, 'status'):
            status = exc.status()
        else:
            status = '500 Internal Server Error'

        if hasattr(exc, 'url'):
            err_url = exc.url
        else:
            err_url = None

        if len(exc.args):
            err_msg = exc.args[0]

        if print_trace:
            import traceback
            err_details = traceback.format_exc()
            print(err_details)
        else:
            logging.info(err_msg)
            err_details = None

        if error_view:
            if err_url and isinstance(err_url, str):
                err_url = to_native_str(err_url, 'utf-8')
            if err_msg and isinstance(err_msg, str):
                err_msg = to_native_str(err_msg, 'utf-8')

            return error_view.render_response(exc_type=type(exc).__name__,
                                              err_msg=err_msg,
                                              err_details=err_details,
                                              status=status,
                                              env=env,
                                              err_url=err_url)
        else:
            msg = status + ' Error: '
            if err_msg:
                msg += err_msg

            #msg = msg.encode('utf-8', 'ignore')
            return WbResponse.text_response(msg,
                                           status=status)
예제 #10
0
    def __call__(self, filename, cdx=None):
        with open(self.pathindex_file, 'rb') as reader:
            result = iter_exact(reader, filename.encode('utf-8'), b'\t')

            for pathline in result:
                paths = pathline.split(b'\t')[1:]
                for path in paths:
                    yield to_native_str(path, 'utf-8')
예제 #11
0
    def __call__(self, filename, cdx=None):
        with open(self.pathindex_file, 'rb') as reader:
            result = iter_exact(reader, filename.encode('utf-8'), b'\t')

            for pathline in result:
                paths = pathline.split(b'\t')[1:]
                for path in paths:
                    yield to_native_str(path, 'utf-8')
예제 #12
0
    def __str__(self):
        if self.cdxline:
            return to_native_str(self.cdxline, 'utf-8')

        if not self._from_json:
            return ' '.join(str(val) for val in six.itervalues(self))
        else:
            return json_encode(self)
예제 #13
0
    def _extract_html_charset(buff, status_headers):
        charset = None
        m = RewriteContent.CHARSET_REGEX.search(buff)
        if m:
            charset = m.group(1)
            charset = to_native_str(charset)
        #    content_type = 'text/html; charset=' + charset
        #    status_headers.replace_header('content-type', content_type)

        return charset
예제 #14
0
    def _extract_html_charset(buff, status_headers):
        charset = None
        m = RewriteContent.CHARSET_REGEX.search(buff)
        if m:
            charset = m.group(1)
            charset = to_native_str(charset)
        #    content_type = 'text/html; charset=' + charset
        #    status_headers.replace_header('content-type', content_type)

        return charset
예제 #15
0
    def read_basic_auth_coll(value):
        parts = value.split(' ')
        if parts[0].lower() != 'basic':
            return ''

        if len(parts) != 2:
            return ''

        user_pass = base64.b64decode(parts[1].encode('utf-8'))
        return to_native_str(user_pass.split(b':')[0])
예제 #16
0
    def read_basic_auth_coll(value):
        parts = value.split(" ")
        if parts[0].lower() != "basic":
            return ""

        if len(parts) != 2:
            return ""

        user_pass = base64.b64decode(parts[1].encode("utf-8"))
        return to_native_str(user_pass.split(b":")[0])
예제 #17
0
    def __call__(self, query):
        matched_rule = None

        urlkey = to_native_str(query.key, 'utf-8')
        url = query.url
        filter_ = query.filters
        output = query.output

        for rule in self.rules.iter_matching(urlkey):
            m = rule.regex.search(urlkey)
            if not m:
                continue

            matched_rule = rule

            groups = m.groups()
            for g in groups:
                for f in matched_rule.filter:
                    filter_.append(f.format(g))

            break

        if not matched_rule:
            return None

        repl = '?'
        if matched_rule.replace:
            repl = matched_rule.replace

        inx = url.find(repl)
        if inx > 0:
            url = url[:inx + len(repl)]

        if matched_rule.match_type == 'domain':
            host = urlsplit(url).netloc
            # remove the subdomain
            url = host.split('.', 1)[1]

        params = query.params
        params.update({
            'url': url,
            'matchType': matched_rule.match_type,
            'filter': filter_
        })

        if 'reverse' in params:
            del params['reverse']

        if 'closest' in params:
            del params['closest']

        if 'end_key' in params:
            del params['end_key']

        return params
예제 #18
0
    def __call__(self, query):
        matched_rule = None

        urlkey = to_native_str(query.key, 'utf-8')
        url = query.url
        filter_ = query.filters
        output = query.output

        for rule in self.rules.iter_matching(urlkey):
            m = rule.regex.search(urlkey)
            if not m:
                continue

            matched_rule = rule

            groups = m.groups()
            for g in groups:
                for f in matched_rule.filter:
                    filter_.append(f.format(g))

            break

        if not matched_rule:
            return None

        repl = '?'
        if matched_rule.replace:
            repl = matched_rule.replace

        inx = url.find(repl)
        if inx > 0:
            url = url[:inx + len(repl)]

        if matched_rule.match_type == 'domain':
            host = urlsplit(url).netloc
            # remove the subdomain
            url = host.split('.', 1)[1]

        params = query.params
        params.update({'url': url,
                       'matchType': matched_rule.match_type,
                       'filter': filter_})

        if 'reverse' in params:
            del params['reverse']

        if 'closest' in params:
            del params['closest']

        if 'end_key' in params:
            del params['end_key']

        return params
예제 #19
0
    def parse(self, stream, headerline=None):
        total_read = 0

        def readline():
            return to_native_str(stream.readline())

        # if headerline passed in, use that
        if headerline is None:
            headerline = readline()
        else:
            headerline = to_native_str(headerline)

        header_len = len(headerline)

        if header_len == 0:
            raise EOFError()

        headerline = headerline.rstrip()

        headernames = self.headernames

        # if arc header, consume next two lines
        if headerline.startswith('filedesc://'):
            version = readline()  # skip version
            spec = readline()  # skip header spec, use preset one
            total_read += len(version)
            total_read += len(spec)

        parts = headerline.split(' ')

        if len(parts) != len(headernames):
            msg = 'Wrong # of headers, expected arc headers {0}, Found {1}'
            msg = msg.format(headernames, parts)
            raise StatusAndHeadersParserException(msg, parts)

        headers = []

        for name, value in zip(headernames, parts):
            headers.append((name, value))

        return StatusAndHeaders(statusline='',
                                headers=headers,
                                protocol='ARC/1.0',
                                total_len=total_read)
예제 #20
0
    def parse(self, stream, headerline=None):
        total_read = 0

        def readline():
            return to_native_str(stream.readline())

        # if headerline passed in, use that
        if headerline is None:
            headerline = readline()
        else:
            headerline = to_native_str(headerline)

        header_len = len(headerline)

        if header_len == 0:
            raise EOFError()

        headerline = headerline.rstrip()

        headernames = self.headernames

        # if arc header, consume next two lines
        if headerline.startswith('filedesc://'):
            version = readline()  # skip version
            spec = readline()  # skip header spec, use preset one
            total_read += len(version)
            total_read += len(spec)

        parts = headerline.split(' ')

        if len(parts) != len(headernames):
            msg = 'Wrong # of headers, expected arc headers {0}, Found {1}'
            msg = msg.format(headernames, parts)
            raise StatusAndHeadersParserException(msg, parts)

        headers = []

        for name, value in zip(headernames, parts):
            headers.append((name, value))

        return StatusAndHeaders(statusline='',
                                headers=headers,
                                protocol='ARC/1.0',
                                total_len=total_read)
예제 #21
0
    def __init__(self, idxline):
        OrderedDict.__init__(self)

        idxline = idxline.rstrip()
        fields = idxline.split(b'\t')

        if len(fields) < self.NUM_REQ_FIELDS:
            msg = 'invalid idx format: {0} fields found, {1} required'
            raise CDXException(msg.format(len(fields), self.NUM_REQ_FIELDS))

        for header, field in zip(self.FORMAT, fields):
            self[header] = to_native_str(field, 'utf-8')

        self['offset'] = int(self['offset'])
        self['length'] = int(self['length'])
        lineno = self.get('lineno')
        if lineno:
            self['lineno'] = int(lineno)

        self.idxline = idxline
예제 #22
0
파일: proxy.py 프로젝트: chdorner/pywb
    def handle_connect(self, env):
        sock = self.get_request_socket(env)
        if not sock:
            return WbResponse.text_response('HTTPS Proxy Not Supported',
                                            '405 HTTPS Proxy Not Supported')

        sock.send(b'HTTP/1.0 200 Connection Established\r\n')
        sock.send(b'Proxy-Connection: close\r\n')
        sock.send(b'Server: pywb proxy\r\n')
        sock.send(b'\r\n')

        hostname, port = env['REL_REQUEST_URI'].split(':')

        if not self.use_wildcard:
            certfile = self.ca.cert_for_host(hostname)
        else:
            certfile = self.ca.get_wildcard_cert(hostname)

        try:
            ssl_sock = ssl.wrap_socket(sock,
                                       server_side=True,
                                       certfile=certfile,
                                       #ciphers="ALL",
                                       suppress_ragged_eofs=False,
                                       ssl_version=ssl.PROTOCOL_SSLv23
                                       )
            env['pywb.proxy_ssl_sock'] = ssl_sock

            buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)

            statusline = to_native_str(buffreader.readline().rstrip())

        except Exception as se:
            raise BadRequestException(se.message)

        statusparts = statusline.split(' ')

        if len(statusparts) < 3:
            raise BadRequestException('Invalid Proxy Request: ' + statusline)

        env['REQUEST_METHOD'] = statusparts[0]
        env['REL_REQUEST_URI'] = ('https://' +
                                  env['REL_REQUEST_URI'].replace(':443', '') +
                                  statusparts[1])

        env['SERVER_PROTOCOL'] = statusparts[2].strip()

        env['pywb.proxy_scheme'] = 'https'

        env['pywb.proxy_host'] = hostname
        env['pywb.proxy_port'] = port
        env['pywb.proxy_req_uri'] = statusparts[1]

        queryparts = env['REL_REQUEST_URI'].split('?', 1)
        env['PATH_INFO'] = queryparts[0]
        env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
        env['pywb.proxy_query'] = env['QUERY_STRING']

        while True:
            line = to_native_str(buffreader.readline())
            if line:
                line = line.rstrip()

            if not line:
                break

            parts = line.split(':', 1)
            if len(parts) < 2:
                continue

            name = parts[0].strip()
            value = parts[1].strip()

            name = name.replace('-', '_').upper()

            if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
                name = 'HTTP_' + name

            env[name] = value

        env['wsgi.input'] = buffreader
예제 #23
0
 def __getitem__(self, item):
     return to_native_str(self.redis.hget(self.key, item), 'utf-8')
예제 #24
0
def get_rewritten(*args, **kwargs):
    status_headers, buff = LiveRewriter().get_rewritten(remote_only=False,
                                                        *args,
                                                        **kwargs)
    return status_headers, to_native_str(buff)
예제 #25
0
 def b64encode(self, string):
     return to_native_str(base64.b64encode(string.encode('utf-8')))
예제 #26
0
def get_rewritten(*args, **kwargs):
    status_headers, buff = LiveRewriter().get_rewritten(remote_only=False, *args, **kwargs)
    return status_headers, to_native_str(buff)
예제 #27
0
 def __call__(self, filename, cdx=None):
     redis_val = self.redis.hget(self.key_prefix + filename, 'path')
     return [to_native_str(redis_val, 'utf-8')] if redis_val else []
예제 #28
0
 def b64encode(self, string):
     return to_native_str(base64.b64encode(string.encode('utf-8')))
예제 #29
0
파일: pywb.py 프로젝트: galgeek/brozzler
def _fuzzy_query_call(self, query):
    # imports added here for brozzler
    from pywb.utils.loaders import to_native_str
    from six.moves.urllib.parse import urlsplit, urlunsplit

    matched_rule = None

    urlkey = to_native_str(query.key, 'utf-8')
    url = query.url
    filter_ = query.filters
    output = query.output

    for rule in self.rules.iter_matching(urlkey):
        m = rule.regex.search(urlkey)
        if not m:
            continue

        matched_rule = rule

        groups = m.groups()
        for g in groups:
            for f in matched_rule.filter:
                filter_.append(f.format(g))

        break

    if not matched_rule:
        return None

    repl = '?'
    if matched_rule.replace:
        repl = matched_rule.replace

    inx = url.find(repl)
    if inx > 0:
        url = url[:inx + len(repl)]

    # begin brozzler changes
    if matched_rule.match_type == 'domain':
        orig_split_url = urlsplit(url)
        # remove the subdomain, path, query and fragment
        host = orig_split_url.netloc.split('.', 1)[1]
        new_split_url = (orig_split_url.scheme, host, '', '', '')
        url = urlunsplit(new_split_url)
    # end brozzler changes

    params = query.params
    params.update({'url': url,
                   'matchType': matched_rule.match_type,
                   'filter': filter_})

    if 'reverse' in params:
        del params['reverse']

    if 'closest' in params:
        del params['closest']

    if 'end_key' in params:
        del params['end_key']

    return params
예제 #30
0
 def __str__(self):
     return to_native_str(self.idxline, 'utf-8')
예제 #31
0
    def parse(self, stream, full_statusline=None):
        """
        parse stream for status line and headers
        return a StatusAndHeaders object

        support continuation headers starting with space or tab
        """

        def readline():
            return to_native_str(stream.readline())

        # status line w newlines intact
        if full_statusline is None:
            full_statusline = readline()
        else:
            full_statusline = to_native_str(full_statusline)

        statusline, total_read = _strip_count(full_statusline, 0)

        headers = []

        # at end of stream
        if total_read == 0:
            raise EOFError()
        elif not statusline:
            return StatusAndHeaders(statusline=statusline,
                                    headers=headers,
                                    protocol='',
                                    total_len=total_read)

        # validate only if verify is set
        if self.verify:
            protocol_status = self.split_prefix(statusline, self.statuslist)

            if not protocol_status:
                msg = 'Expected Status Line starting with {0} - Found: {1}'
                msg = msg.format(self.statuslist, statusline)
                raise StatusAndHeadersParserException(msg, full_statusline)
        else:
            protocol_status = statusline.split(' ', 1)

        line, total_read = _strip_count(readline(), total_read)
        while line:
            result = line.split(':', 1)
            if len(result) == 2:
                name = result[0].rstrip(' \t')
                value = result[1].lstrip()
            else:
                name = result[0]
                value = None

            next_line, total_read = _strip_count(readline(),
                                                 total_read)

            # append continuation lines, if any
            while next_line and next_line.startswith((' ', '\t')):
                if value is not None:
                    value += next_line
                next_line, total_read = _strip_count(readline(),
                                                     total_read)

            if value is not None:
                header = (name, value)
                headers.append(header)

            line = next_line

        if len(protocol_status) > 1:
            statusline = protocol_status[1].strip()
        else:
            statusline = ''

        return StatusAndHeaders(statusline=statusline,
                                headers=headers,
                                protocol=protocol_status[0],
                                total_len=total_read)
예제 #32
0
    def handle_connect(self, env):
        sock = self.get_request_socket(env)
        if not sock:
            return WbResponse.text_response('HTTPS Proxy Not Supported',
                                            '405 HTTPS Proxy Not Supported')

        sock.send(b'HTTP/1.0 200 Connection Established\r\n')
        sock.send(b'Proxy-Connection: close\r\n')
        sock.send(b'Server: pywb proxy\r\n')
        sock.send(b'\r\n')

        hostname, port = env['REL_REQUEST_URI'].split(':')

        if not self.use_wildcard:
            certfile = self.ca.cert_for_host(hostname)
        else:
            certfile = self.ca.get_wildcard_cert(hostname)

        try:
            ssl_sock = ssl.wrap_socket(
                sock,
                server_side=True,
                certfile=certfile,
                #ciphers="ALL",
                suppress_ragged_eofs=False,
                ssl_version=ssl.PROTOCOL_SSLv23)
            env['pywb.proxy_ssl_sock'] = ssl_sock

            buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)

            statusline = to_native_str(buffreader.readline().rstrip())

        except Exception as se:
            raise BadRequestException(se.message)

        statusparts = statusline.split(' ')

        if len(statusparts) < 3:
            raise BadRequestException('Invalid Proxy Request: ' + statusline)

        env['REQUEST_METHOD'] = statusparts[0]
        env['REL_REQUEST_URI'] = ('https://' +
                                  env['REL_REQUEST_URI'].replace(':443', '') +
                                  statusparts[1])

        env['SERVER_PROTOCOL'] = statusparts[2].strip()

        env['pywb.proxy_scheme'] = 'https'

        env['pywb.proxy_host'] = hostname
        env['pywb.proxy_port'] = port
        env['pywb.proxy_req_uri'] = statusparts[1]

        queryparts = env['REL_REQUEST_URI'].split('?', 1)
        env['PATH_INFO'] = queryparts[0]
        env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
        env['pywb.proxy_query'] = env['QUERY_STRING']

        while True:
            line = to_native_str(buffreader.readline())
            if line:
                line = line.rstrip()

            if not line:
                break

            parts = line.split(':', 1)
            if len(parts) < 2:
                continue

            name = parts[0].strip()
            value = parts[1].strip()

            name = name.replace('-', '_').upper()

            if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
                name = 'HTTP_' + name

            env[name] = value

        env['wsgi.input'] = buffreader
예제 #33
0
 def readline():
     return to_native_str(stream.readline())
예제 #34
0
 def __str__(self):
     return self.type_ + ':' + to_native_str(base64.b32encode(self.digester.digest()))
예제 #35
0
 def __call__(self, filename, cdx=None):
     redis_val = self.redis.hget(self.key_prefix + filename, 'path')
     return [to_native_str(redis_val, 'utf-8')] if redis_val else []
예제 #36
0
파일: pywb.py 프로젝트: FOrapin/brozzler
def _fuzzy_query_call(self, query):
    # imports added here for brozzler
    from pywb.utils.loaders import to_native_str
    from six.moves.urllib.parse import urlsplit, urlunsplit

    matched_rule = None

    urlkey = to_native_str(query.key, 'utf-8')
    url = query.url
    filter_ = query.filters
    output = query.output

    for rule in self.rules.iter_matching(urlkey):
        m = rule.regex.search(urlkey)
        if not m:
            continue

        matched_rule = rule

        groups = m.groups()
        for g in groups:
            for f in matched_rule.filter:
                filter_.append(f.format(g))

        break

    if not matched_rule:
        return None

    repl = '?'
    if matched_rule.replace:
        repl = matched_rule.replace

    inx = url.find(repl)
    if inx > 0:
        url = url[:inx + len(repl)]

    # begin brozzler changes
    if matched_rule.match_type == 'domain':
        orig_split_url = urlsplit(url)
        # remove the subdomain, path, query and fragment
        host = orig_split_url.netloc.split('.', 1)[1]
        new_split_url = (orig_split_url.scheme, host, '', '', '')
        url = urlunsplit(new_split_url)
    # end brozzler changes

    params = query.params
    params.update({
        'url': url,
        'matchType': matched_rule.match_type,
        'filter': filter_
    })

    if 'reverse' in params:
        del params['reverse']

    if 'closest' in params:
        del params['closest']

    if 'end_key' in params:
        del params['end_key']

    return params
예제 #37
0
파일: cache.py 프로젝트: chdorner/pywb
 def __getitem__(self, item):
     return to_native_str(self.redis.hget(self.key, item), 'utf-8')
예제 #38
0
 def readline():
     return to_native_str(stream.readline())
예제 #39
0
    def parse(self, stream, full_statusline=None):
        """
        parse stream for status line and headers
        return a StatusAndHeaders object

        support continuation headers starting with space or tab
        """
        def readline():
            return to_native_str(stream.readline())

        # status line w newlines intact
        if full_statusline is None:
            full_statusline = readline()
        else:
            full_statusline = to_native_str(full_statusline)

        statusline, total_read = _strip_count(full_statusline, 0)

        headers = []

        # at end of stream
        if total_read == 0:
            raise EOFError()
        elif not statusline:
            return StatusAndHeaders(statusline=statusline,
                                    headers=headers,
                                    protocol='',
                                    total_len=total_read)

        # validate only if verify is set
        if self.verify:
            protocol_status = self.split_prefix(statusline, self.statuslist)

            if not protocol_status:
                msg = 'Expected Status Line starting with {0} - Found: {1}'
                msg = msg.format(self.statuslist, statusline)
                raise StatusAndHeadersParserException(msg, full_statusline)
        else:
            protocol_status = statusline.split(' ', 1)

        line, total_read = _strip_count(readline(), total_read)
        while line:
            result = line.split(':', 1)
            if len(result) == 2:
                name = result[0].rstrip(' \t')
                value = result[1].lstrip()
            else:
                name = result[0]
                value = None

            next_line, total_read = _strip_count(readline(), total_read)

            # append continuation lines, if any
            while next_line and next_line.startswith((' ', '\t')):
                if value is not None:
                    value += next_line
                next_line, total_read = _strip_count(readline(), total_read)

            if value is not None:
                header = (name, value)
                headers.append(header)

            line = next_line

        if len(protocol_status) > 1:
            statusline = protocol_status[1].strip()
        else:
            statusline = ''

        return StatusAndHeaders(statusline=statusline,
                                headers=headers,
                                protocol=protocol_status[0],
                                total_len=total_read)