Esempio n. 1
0
class BloomStringSet:
    """
    BloomSet implements set membership in a fixed memory footprint,
    but can reproduce the keys by streaming them to temporary files.
    As it's based on a bloom filter, there is a risk of false positives,
    which would cause some missing keyue, however the likelihood
    of such is controllable through the parameters. Temporary files
    are only created after they reach 5MB, otherwise stay in memory.
    
    cardinality: the estimated maximum number of unique elements.
                 As one goes beyond this number, risk of collision
                 increases, but sloooooowly.

    error_rate:  the false positive rate you are comfortable with when
                 the cardinality number is reached.

    """
    def __init__(self, cardinality=10**6, error_rate=10**-9):
        self.bloom = BloomFilter(cardinality, error_rate)
        self.file = SpooledTemporaryFile(max_size=(2**20) * 5, mode='w')
        self.closed = False

    def add(self, key):
        if self.closed:
            raise Exception("Cannot add new element after attempting to read")

        if type(key) is not str:
            raise Exception("Can only use string keys for now")

        if key in self.bloom:
            return False

        self.bloom.add(key)
        self.file.write(key + "\n")

    def __contains__(self, key):
        return key in self.bloom

    def __iter__(self):
        self.closed = True
        self.file.seek(0)
        return self

    def __next__(self):
        line = self.file.readline()
        if line == '':
            self.file.close()
            raise StopIteration
        return line.strip()

    def __del__(self):
        self.file.close()
Esempio n. 2
0
class InputStream(object):
    """
    FCGI_STDIN or FCGI_DATA stream.
    Uses temporary file to store received data once max_mem bytes
    have been received.
    """
    def __init__(self, max_mem=1024):
        self._file = SpooledTemporaryFile(max_mem)
        self._eof_received = Event()

    def __del__(self):
        self._file.close()

    def feed(self, data):
        if self._eof_received.is_set():
            raise IOError('Feeding file beyond EOF mark')
        if not data:  # EOF mark
            self._file.seek(0)
            self._eof_received.set()
        else:
            if isinstance(data, six.text_type):
                data = data.encode("ISO-8859-1")
            self._file.write(data)

    def __iter__(self):
        self._eof_received.wait()
        return iter(self._file)

    def read(self, size=-1):
        self._eof_received.wait()
        return self._file.read(size)

    def readline(self, size=-1):
        self._eof_received.wait()
        return self._file.readline(size)

    def readlines(self, sizehint=0):
        self._eof_received.wait()
        return self._file.readlines(sizehint)

    @property
    def eof_received(self):
        return self._eof_received.is_set()
Esempio n. 3
0
class InputStream(object):
    """
    FCGI_STDIN or FCGI_DATA stream.
    Uses temporary file to store received data once max_mem bytes
    have been received.
    """
    def __init__(self, max_mem=1024):
        self._file = SpooledTemporaryFile(max_mem)
        self._eof_received = Event()

    def feed(self, data):
        if self._eof_received.is_set():
            raise IOError('Feeding file beyond EOF mark')
        if not data:  # EOF mark
            self._file.seek(0)
            self._eof_received.set()
        else:
            self._file.write(data)

    def __iter__(self):
        self._eof_received.wait()
        return iter(self._file)

    def read(self, size=-1):
        self._eof_received.wait()
        return self._file.read(size)

    def readline(self, size=-1):
        self._eof_received.wait()
        return self._file.readline(size)

    def readlines(self, sizehint=0):
        self._eof_received.wait()
        return self._file.readlines(sizehint)

    @property
    def eof_received(self):
        return self._eof_received.is_set()
Esempio n. 4
0
class Request(object):
    def __init__(self, ip='', sock=None):
        self.ip = ip
        self.method = '-'
        self.path = '-'
        self.url = '-'
        self.protocol = '-'
        self.body = ''

        self._sock = sock
        self._input = SpooledTemporaryFile(max_size=1024 * 1024 * 2)
        self._output = Queue.Queue()
        self._bytes = 0
        self._content_length = 0
        self._header_data = ''

        self.headers = ImmutableMultiDict()
        self.cookies = []
        self.files = []
        self.get = ImmutableMultiDict()
        self.post = ImmutableMultiDict()

    def _parse(self):
        if self._input:
            self._input.seek(0)
            self.method, self.path, self.protocol = self._input.readline().split(' ')
            self.url, separator, querystring = self.path.partition('?')

            # Parse headers and cookies
            self.headers, self.cookies = self._parse_headers_and_cookies()

            # Parse POST and FILES
            parser = MultiPartParser(self)
            self.post, self.files = parser._parse_post_and_files()

            # Parse GET
            self.get = self._parse_request_data(querystring)

    def _has_more_data(self):
        return self._message_bytes < self._content_length

    @property
    def _message_bytes(self):
        return self._bytes - self._get_message_start()

    def _get_message_start(self):
        try:
            return self._header_data.index('\r\n\r\n') + 4
        except:
            return 0

    def _write(self, data):
        self._bytes += len(data)
        self._input.write(data)

        # Only accummulate up to 16kb of possible header data
        if len(self._header_data) < 2 ** 14:
            self._header_data += data
            match = re.search('content\-length: ([0-9]+)', self._header_data.lower())
            if match:
                # TODO: does not apply for HEAD requests
                self._content_length = int(match.group(1))

    def _parse_headers_and_cookies(self):
        self._input.seek(0)
        headers = []
        cookies = []
        # Skip first line
        self._input.readline()
        while True:
            header = self._input.readline().strip()
            if not header:
                # Newline, which means end of HTTP headers.
                break
            field = header.split(': ')[0]
            try:
                value = header[header.index(': ') + 2:]
            except:
                continue
            fl = field.lower()
            if fl == 'x-real-ip' or fl == 'x-forwarded-for':
                self.ip = value.split(',')[0].strip()
            elif fl == 'cookie':
                for cpair in value.split(';'):
                    try:
                        cname, cvalue = cpair.strip().split('=')
                    except ValueError:
                        pass
                    else:
                        cookies.append(Cookie(name=cname, value=cvalue))
            else:
                headers.append((field, value))
        return (ImmutableMultiDict(headers), cookies)

    def _parse_request_data(self, data):
        values = []
        for pair in data.split('&'):
            try:
                k, v = pair.split('=')
            except ValueError:
                pass
            else:
                v = urllib.unquote_plus(v)
                values.append((k, v))
        return ImmutableMultiDict(values)

    @property
    def _execution_time(self):
        try:
            return time.time() - self._start_time
        except:
            return 0