class BloomStringSet: """ BloomSet implements set membership in a fixed memory footprint, but can reproduce the keys by streaming them to temporary files. As it's based on a bloom filter, there is a risk of false positives, which would cause some missing keyue, however the likelihood of such is controllable through the parameters. Temporary files are only created after they reach 5MB, otherwise stay in memory. cardinality: the estimated maximum number of unique elements. As one goes beyond this number, risk of collision increases, but sloooooowly. error_rate: the false positive rate you are comfortable with when the cardinality number is reached. """ def __init__(self, cardinality=10**6, error_rate=10**-9): self.bloom = BloomFilter(cardinality, error_rate) self.file = SpooledTemporaryFile(max_size=(2**20) * 5, mode='w') self.closed = False def add(self, key): if self.closed: raise Exception("Cannot add new element after attempting to read") if type(key) is not str: raise Exception("Can only use string keys for now") if key in self.bloom: return False self.bloom.add(key) self.file.write(key + "\n") def __contains__(self, key): return key in self.bloom def __iter__(self): self.closed = True self.file.seek(0) return self def __next__(self): line = self.file.readline() if line == '': self.file.close() raise StopIteration return line.strip() def __del__(self): self.file.close()
class InputStream(object): """ FCGI_STDIN or FCGI_DATA stream. Uses temporary file to store received data once max_mem bytes have been received. """ def __init__(self, max_mem=1024): self._file = SpooledTemporaryFile(max_mem) self._eof_received = Event() def __del__(self): self._file.close() def feed(self, data): if self._eof_received.is_set(): raise IOError('Feeding file beyond EOF mark') if not data: # EOF mark self._file.seek(0) self._eof_received.set() else: if isinstance(data, six.text_type): data = data.encode("ISO-8859-1") self._file.write(data) def __iter__(self): self._eof_received.wait() return iter(self._file) def read(self, size=-1): self._eof_received.wait() return self._file.read(size) def readline(self, size=-1): self._eof_received.wait() return self._file.readline(size) def readlines(self, sizehint=0): self._eof_received.wait() return self._file.readlines(sizehint) @property def eof_received(self): return self._eof_received.is_set()
class InputStream(object): """ FCGI_STDIN or FCGI_DATA stream. Uses temporary file to store received data once max_mem bytes have been received. """ def __init__(self, max_mem=1024): self._file = SpooledTemporaryFile(max_mem) self._eof_received = Event() def feed(self, data): if self._eof_received.is_set(): raise IOError('Feeding file beyond EOF mark') if not data: # EOF mark self._file.seek(0) self._eof_received.set() else: self._file.write(data) def __iter__(self): self._eof_received.wait() return iter(self._file) def read(self, size=-1): self._eof_received.wait() return self._file.read(size) def readline(self, size=-1): self._eof_received.wait() return self._file.readline(size) def readlines(self, sizehint=0): self._eof_received.wait() return self._file.readlines(sizehint) @property def eof_received(self): return self._eof_received.is_set()
class Request(object): def __init__(self, ip='', sock=None): self.ip = ip self.method = '-' self.path = '-' self.url = '-' self.protocol = '-' self.body = '' self._sock = sock self._input = SpooledTemporaryFile(max_size=1024 * 1024 * 2) self._output = Queue.Queue() self._bytes = 0 self._content_length = 0 self._header_data = '' self.headers = ImmutableMultiDict() self.cookies = [] self.files = [] self.get = ImmutableMultiDict() self.post = ImmutableMultiDict() def _parse(self): if self._input: self._input.seek(0) self.method, self.path, self.protocol = self._input.readline().split(' ') self.url, separator, querystring = self.path.partition('?') # Parse headers and cookies self.headers, self.cookies = self._parse_headers_and_cookies() # Parse POST and FILES parser = MultiPartParser(self) self.post, self.files = parser._parse_post_and_files() # Parse GET self.get = self._parse_request_data(querystring) def _has_more_data(self): return self._message_bytes < self._content_length @property def _message_bytes(self): return self._bytes - self._get_message_start() def _get_message_start(self): try: return self._header_data.index('\r\n\r\n') + 4 except: return 0 def _write(self, data): self._bytes += len(data) self._input.write(data) # Only accummulate up to 16kb of possible header data if len(self._header_data) < 2 ** 14: self._header_data += data match = re.search('content\-length: ([0-9]+)', self._header_data.lower()) if match: # TODO: does not apply for HEAD requests self._content_length = int(match.group(1)) def _parse_headers_and_cookies(self): self._input.seek(0) headers = [] cookies = [] # Skip first line self._input.readline() while True: header = self._input.readline().strip() if not header: # Newline, which means end of HTTP headers. break field = header.split(': ')[0] try: value = header[header.index(': ') + 2:] except: continue fl = field.lower() if fl == 'x-real-ip' or fl == 'x-forwarded-for': self.ip = value.split(',')[0].strip() elif fl == 'cookie': for cpair in value.split(';'): try: cname, cvalue = cpair.strip().split('=') except ValueError: pass else: cookies.append(Cookie(name=cname, value=cvalue)) else: headers.append((field, value)) return (ImmutableMultiDict(headers), cookies) def _parse_request_data(self, data): values = [] for pair in data.split('&'): try: k, v = pair.split('=') except ValueError: pass else: v = urllib.unquote_plus(v) values.append((k, v)) return ImmutableMultiDict(values) @property def _execution_time(self): try: return time.time() - self._start_time except: return 0