def test_dump(): Xs, y = load_svmlight_file(datafile) Xd = Xs.toarray() for X in (Xs, Xd): for zero_based in (True, False): for dtype in [np.float32, np.float64]: f = BytesIO() # we need to pass a comment to get the version info in; # LibSVM doesn't grok comments so they're not put in by # default anymore. dump_svmlight_file(X.astype(dtype), y, f, comment="test", zero_based=zero_based) f.seek(0) comment = f.readline() assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() assert_in(["one", "zero"][zero_based] + "-based", comment) X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert_equal(X2.dtype, dtype) if dtype == np.float32: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 4) else: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 15) assert_array_equal(y, y2)
def test_dump(): Xs, y = load_svmlight_file(datafile) Xd = Xs.toarray() for X in (Xs, Xd): for zero_based in (True, False): for dtype in [np.float32, np.float64]: f = BytesIO() dump_svmlight_file(X.astype(dtype), y, f, zero_based=zero_based) f.seek(0) comment = f.readline() assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() assert_in(["one", "zero"][zero_based] + "-based", comment) X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert_equal(X2.dtype, dtype) if dtype == np.float32: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 4, ) else: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 15, ) assert_array_equal(y, y2)
def test_dump_concise(): one = 1 two = 2.1 three = 3.01 exact = 1.000000000000001 # loses the last decimal place almost = 1.0000000000000001 X = [[one, two, three, exact, almost], [1e9, 2e18, 3e27, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] y = [one, two, three, exact, almost] f = BytesIO() dump_svmlight_file(X, y, f) f.seek(0) # make sure it's using the most concise format possible assert_equal(f.readline(), b("1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")) assert_equal(f.readline(), b("2.1 0:1000000000 1:2e+18 2:3e+27\n")) assert_equal(f.readline(), b("3.01 \n")) assert_equal(f.readline(), b("1.000000000000001 \n")) assert_equal(f.readline(), b("1 \n")) f.seek(0) # make sure it's correct too :) X2, y2 = load_svmlight_file(f) assert_array_almost_equal(X, X2.toarray()) assert_array_equal(y, y2)
def test_read_inventory_v1(): f = BytesIO(inventory_v1) f.readline() invdata = read_inventory_v1(f, '/util', posixpath.join) assert invdata['py:module']['module'] == \ ('foo', '1.0', '/util/foo.html#module-module', '-') assert invdata['py:class']['module.cls'] == \ ('foo', '1.0', '/util/foo.html#module.cls', '-')
def test_first_byte_timestamp_updated_on_readline(self): s = BytesIO(b"foobar\nfoobar\nfoobar") s = tcp.Reader(s) s.readline() assert s.first_byte_timestamp expected = s.first_byte_timestamp s.readline() assert s.first_byte_timestamp == expected
def test_dump(): X_sparse, y_dense = load_svmlight_file(datafile) X_dense = X_sparse.toarray() y_sparse = sp.csr_matrix(y_dense) # slicing a csr_matrix can unsort its .indices, so test that we sort # those correctly X_sliced = X_sparse[np.arange(X_sparse.shape[0])] y_sliced = y_sparse[np.arange(y_sparse.shape[0])] for X in (X_sparse, X_dense, X_sliced): for y in (y_sparse, y_dense, y_sliced): for zero_based in (True, False): for dtype in [np.float32, np.float64, np.int32]: f = BytesIO() # we need to pass a comment to get the version info in; # LibSVM doesn't grok comments so they're not put in by # default anymore. if (sp.issparse(y) and y.shape[0] == 1): # make sure y's shape is: (n_samples, n_labels) # when it is sparse y = y.T dump_svmlight_file(X.astype(dtype), y, f, comment="test", zero_based=zero_based) f.seek(0) comment = f.readline() comment = str(comment, "utf-8") assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() comment = str(comment, "utf-8") assert_in(["one", "zero"][zero_based] + "-based", comment) X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert_equal(X2.dtype, dtype) assert_array_equal(X2.sorted_indices().indices, X2.indices) X2_dense = X2.toarray() if dtype == np.float32: # allow a rounding error at the last decimal place assert_array_almost_equal( X_dense.astype(dtype), X2_dense, 4) assert_array_almost_equal( y_dense.astype(dtype), y2, 4) else: # allow a rounding error at the last decimal place assert_array_almost_equal( X_dense.astype(dtype), X2_dense, 15) assert_array_almost_equal( y_dense.astype(dtype), y2, 15)
def test_dump(): Xs, y = load_svmlight_file(datafile) Xd = Xs.toarray() # slicing a csr_matrix can unsort its .indices, so test that we sort # those correctly Xsliced = Xs[np.arange(Xs.shape[0])] for X in (Xs, Xd, Xsliced): for zero_based in (True, False): for dtype in [np.float32, np.float64, np.int32]: f = BytesIO() # we need to pass a comment to get the version info in; # LibSVM doesn't grok comments so they're not put in by # default anymore. dump_svmlight_file(X.astype(dtype), y, f, comment="test", zero_based=zero_based) f.seek(0) comment = f.readline() try: comment = str(comment, "utf-8") except TypeError: # fails in Python 2.x pass assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() try: comment = str(comment, "utf-8") except TypeError: # fails in Python 2.x pass assert_in(["one", "zero"][zero_based] + "-based", comment) X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert_equal(X2.dtype, dtype) assert_array_equal(X2.sorted_indices().indices, X2.indices) if dtype == np.float32: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 4, ) else: assert_array_almost_equal( # allow a rounding error at the last decimal place Xd.astype(dtype), X2.toarray(), 15, ) assert_array_equal(y, y2)
def test_dump_multilabel(): X = [[1, 0, 3, 0, 5], [0, 0, 0, 0, 0], [0, 5, 0, 1, 0]] y_dense = [[0, 1, 0], [1, 0, 1], [1, 1, 0]] y_sparse = sp.csr_matrix(y_dense) for y in [y_dense, y_sparse]: f = BytesIO() dump_svmlight_file(X, y, f, multilabel=True) f.seek(0) # make sure it dumps multilabel correctly assert_equal(f.readline(), b("1 0:1 2:3 4:5\n")) assert_equal(f.readline(), b("0,2 \n")) assert_equal(f.readline(), b("0,1 1:5 3:1\n"))
class DummyFile(object): def __init__(self, data): self.buffer = BytesIO(data) def read(self, amt=None): return self.buffer.read(amt) def fileno(self): return -1 def readline(self): self.buffer.readline() def __iter__(self): return self.buffer.__iter__()
def _pull_target(dtuf_main, target, expected_dgsts, expected_sizes, get_info, capfd): environ = {'DTUF_BLOB_INFO': '1'} environ.update(dtuf_main) assert dtuf.main.doit(['pull-target', pytest.repo, target], environ if get_info else dtuf_main) == 0 # pylint: disable=protected-access encoding = capfd._capture.out.tmpfile.encoding capfd._capture.out.tmpfile.encoding = None out, err = capfd.readouterr() if get_info: outs = BytesIO(out) for i, size in enumerate(expected_sizes): assert outs.readline() == expected_dgsts[i].encode('utf-8') + b' ' + str(size).encode('utf-8') + b'\n' sha256 = hashlib.sha256() sha256.update(outs.read(size)) assert sha256.hexdigest() == expected_dgsts[i] assert len(outs.read()) == 0 else: pos = 0 for i, size in enumerate(expected_sizes): sha256 = hashlib.sha256() sha256.update(out[pos:pos + size]) pos += size assert sha256.hexdigest() == expected_dgsts[i] assert pos == len(out) assert err == "" capfd._capture.out.tmpfile.encoding = encoding
def from_bytes(cls, bytestr): # TODO: generify bio = BytesIO(bytestr) reader = ResponseReader() state = reader.state while True: if state is M.Complete: break elif state.type == M.NeedLine.type: line = bio.readline() # TODO: limit? next_state = M.HaveLine(value=line) elif state.type == M.NeedData.type: data = bio.read(state.amount) # TODO: can this block or return None if empty etc? next_state = M.HaveData(value=data) elif state.type == M.NeedPeek.type: peeked = bio.peek(state.amount) if not peeked: pass # TODO: again, what happens on end of stream next_state = M.HavePeek(amount=peeked) else: raise RuntimeError('Unknown state %r' % (state,)) state = reader.send(next_state) return reader.raw_response
class ResponseProxy(): def __init__(self, req): self._info=req.info() if self._info.get('Content-Encoding')=='gzip' or \ self._info.get('Content-Encoding')=='deflate': data=decode_data(req); del self._info['Content-Encoding'] else: data=req.read() self._data=BytesIO(data) req.close() self.url=req.geturl() self.code, self.msg= req.code, req.msg def read(self, *args): return self._data.read(*args) def readline(self): return self._data.readline() def info(self): return self._info def geturl(self): return self.url def close(self): self._data.close() def seek(self, idx): self._data.seek(idx)
class DummySocket(object): def __init__(self): self.queue = [] self._buffer = BytesIO() self.can_read = False @property def buffer(self): return memoryview(self._buffer.getvalue()) def advance_buffer(self, amt): self._buffer.read(amt) def send(self, data): if not isinstance(data, bytes): raise TypeError() self.queue.append(data) def recv(self, l): return memoryview(self._buffer.read(l)) def close(self): pass def readline(self): return memoryview(self._buffer.readline()) def fill(self): pass
def test_readlog(self): s = BytesIO(b"foobar\nfoobar") s = tcp.Reader(s) assert not s.is_logging() s.start_log() assert s.is_logging() s.readline() assert s.get_log() == b"foobar\n" s.read(1) assert s.get_log() == b"foobar\nf" s.start_log() assert s.get_log() == b"" s.read(1) assert s.get_log() == b"o" s.stop_log() tutils.raises(ValueError, s.get_log)
def test_seek_cur(self): for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE, CompressionTypes.GZIP]: file_name = self._create_compressed_file(compression_type, self.content) with open(file_name, 'rb') as f: compressed_fd = CompressedFile(f, compression_type, read_size=self.read_block_size) reference_fd = BytesIO(self.content) # Test out of bound, inbound seeking in both directions # Note: BytesIO's seek() reports out of bound positions (if we seek # beyond the file), therefore we need to cap it to max_position (to # make it consistent with the old StringIO behavior for seek_position in (-1, 0, 1, len(self.content) // 2, len(self.content) // 2, -1 * len(self.content) // 2): compressed_fd.seek(seek_position, os.SEEK_CUR) reference_fd.seek(seek_position, os.SEEK_CUR) uncompressed_line = compressed_fd.readline() expected_line = reference_fd.readline() self.assertEqual(uncompressed_line, expected_line) reference_position = reference_fd.tell() uncompressed_position = compressed_fd.tell() max_position = len(self.content) reference_position = min(reference_position, max_position) reference_fd.seek(reference_position, os.SEEK_SET) self.assertEqual(uncompressed_position, reference_position)
def test_seek_set(self): for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE, CompressionTypes.GZIP]: file_name = self._create_compressed_file(compression_type, self.content) with open(file_name, 'rb') as f: compressed_fd = CompressedFile(f, compression_type, read_size=self.read_block_size) reference_fd = BytesIO(self.content) # Note: BytesIO's tell() reports out of bound positions (if we seek # beyond the file), therefore we need to cap it to max_position # _CompressedFile.tell() always stays within the bounds of the # uncompressed content. # Negative seek position argument is not supported for BytesIO with # whence set to SEEK_SET. for seek_position in (0, 1, len(self.content)-1, len(self.content), len(self.content) + 1): compressed_fd.seek(seek_position, os.SEEK_SET) reference_fd.seek(seek_position, os.SEEK_SET) uncompressed_line = compressed_fd.readline() reference_line = reference_fd.readline() self.assertEqual(uncompressed_line, reference_line) uncompressed_position = compressed_fd.tell() reference_position = reference_fd.tell() max_position = len(self.content) reference_position = min(reference_position, max_position) self.assertEqual(uncompressed_position, reference_position)
def actual_process(self, data): cols = data['columns'] csv = BytesIO(data['csv']) # The file is Bytes, encoded. encoding = self.guess_encoding(data['csv']) # TODO: Delivery? try: dialect = Sniffer().sniff(csv.readline(), [',', '\t']) except err: dialect = excel csv.seek(0) reader = UnicodeDictReader(csv, cols, encoding=encoding, dialect=dialect) profiles = [] retval = None try: next(reader) # Skip the first row (the header) except UnicodeDecodeError as e: t = guess_content_type(body=data['csv'])[0] msg = 'The file is different from what is required. (It '\ 'appears to be a {0} file.) Please check that you '\ 'selected the correct CSV file.' m = {'status': -2, 'message': [msg.format(t.split('/')[0]), str(e), t]} retval = to_json(m) except StopIteration: msg = 'The file appears to be empty. Please check that you '\ 'generated the CSV file correctly.' m = {'status': -5, 'message': [msg, 'no-rows']} retval = to_json(m) else: rowCount = 0 for row in reader: rowCount += 1 if len(row) != len(cols): # *Technically* the number of columns in CSV rows can be # arbitary. However, I am enforcing a strict # interpretation for sanity's sake. msg = 'Row {0} had {1} columns, rather than {2}. ' \ 'Please check the file.' # Name hack. m = {'status': -3, 'message': [msg.format(rowCount, len(row), len(cols))]} retval = to_json(m) profiles = [] # --=mpj17=-- I think this is the first time I have used # break in actual code. Wow. break profiles.append(row) if profiles and (not retval): retval = to_json(profiles) elif (not profiles) and not(retval): msg = 'No rows were found in the CSV file. '\ 'Please check that you selected the correct CSV file.' m = {'status': -4, 'message': [msg, 'no-rows']} retval = to_json(m) assert retval, 'No retval' return retval
def test_wrap(self): s = BytesIO(b"foobar\nfoobar") s.flush() s = tcp.Reader(s) assert s.readline() == b"foobar\n" assert s.readline() == b"foobar" # Test __getattr__ assert s.isatty
def _decode(raw): """As per PEP 263, decode raw data according to the encoding specified in the first couple lines of the data, or in ASCII. Non-ASCII data without an encoding specified will cause UnicodeDecodeError to be raised. """ assert type(raw) is bytes # sanity check decl_re = re.compile(br'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') def get_declaration(line): match = decl_re.match(line) if match: return match.group(1) return None encoding = None fulltext = b'' sio = BytesIO(raw) for line in (sio.readline(), sio.readline()): potential = get_declaration(line) if potential is not None: if encoding is None: # If both lines match, use the first. This matches Python's # observed behavior. encoding = potential munged = b'# encoding set to ' + encoding + b'\n' else: # But always munge any encoding line. We can't simply remove # the line, because we want to preserve the line numbering. # However, later on when we ask Python to exec a unicode # object, we'll get a SyntaxError if we have a well-formed # `coding: # ` line in it. munged = b'# encoding NOT set to ' + potential + b'\n' line = line.split(b'#')[0] + munged fulltext += line fulltext += sio.read() sio.close() encoding = encoding.decode('ascii') if encoding else 'ascii' return fulltext.decode(encoding)
def readline(self, size=None): while b"\n" not in self.buffer and (size is None or len(self.buffer) < size): if size: # since size is not None here, len(self.buffer) < size chunk = self._read_limited(size - len(self.buffer)) else: chunk = self._read_limited() if not chunk: break self.buffer += chunk sio = BytesIO(self.buffer) if size: line = sio.readline(size) else: line = sio.readline() self.buffer = sio.read() return line
def test_load_physics(self): from io import BytesIO stream = BytesIO(b"""$PhysicalNames 1 2 1 "lower" $EndPhysicalNames""") # Check header. self.assertEqual(stream.readline(), b'$PhysicalNames\n') # Check physics body. res = gmsh.Gmsh._load_physics(stream) self.assertEqual(list(res.keys()), ['physics']) res = res['physics'] self.assertEqual(len(res), 2) self.assertEqual(res[0], b'1') self.assertEqual(res[1], b'2 1 "lower"') # Check trailing. self.assertEqual(stream.readline(), b'')
class Request(BaseHTTPRequestHandler): def __init__(self, data): _, self.body = decode_utf8(data).split('\r\n\r\n', 1) self.rfile = BytesIO(encode_utf8(data)) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request() self.method = self.command
def test_read_inventory_v2(): f = BytesIO(inventory_v2) f.readline() invdata1 = read_inventory_v2(f, "/util", posixpath.join) # try again with a small buffer size to test the chunking algorithm f = BytesIO(inventory_v2) f.readline() invdata2 = read_inventory_v2(f, "/util", posixpath.join, bufsize=5) assert invdata1 == invdata2 assert len(invdata1["py:module"]) == 2 assert invdata1["py:module"]["module1"] == ("foo", "2.0", "/util/foo.html#module-module1", "Long Module desc") assert invdata1["py:module"]["module2"] == ("foo", "2.0", "/util/foo.html#module-module2", "-") assert invdata1["py:function"]["module1.func"][2] == "/util/sub/foo.html#module1.func" assert invdata1["c:function"]["CFunc"][2] == "/util/cfunc.html#CFunc" assert invdata1["std:term"]["a term"][2] == "/util/glossary.html#term-a-term"
class HTTPRequest(BaseHTTPRequestHandler): def __init__(self, request): self.rfile = BytesIO(request) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request() def send_error(self, code, message): self.error_code = code self.error_message = message
def test_read_inventory_v2(): f = BytesIO(inventory_v2) f.readline() invdata1 = read_inventory_v2(f, '/util', posixpath.join) # try again with a small buffer size to test the chunking algorithm f = BytesIO(inventory_v2) f.readline() invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5) assert invdata1 == invdata2 assert len(invdata1['py:module']) == 2 assert invdata1['py:module']['module1'] == \ ('foo', '2.0', '/util/foo.html#module-module1', 'Long Module desc') assert invdata1['py:module']['module2'] == \ ('foo', '2.0', '/util/foo.html#module-module2', '-') assert invdata1['py:function']['module1.func'][2] == \ '/util/sub/foo.html#module1.func' assert invdata1['c:function']['CFunc'][2] == '/util/cfunc.html#CFunc'
class Request(BaseHTTPRequestHandler): # noinspection PyMissingConstructor def __init__(self, request_text): self.rfile = BytesIO(request_text) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request() def send_error(self, code, message=None): self.error_code = code self.error_message = message
def parse_csv_dataset(data, handle_units): fobj = BytesIO(data) names, units = parse_csv_header(fobj.readline().decode('utf-8')) arrs = np.genfromtxt(fobj, dtype=None, names=names, delimiter=',', unpack=True, converters={'date': lambda s: parse_iso_date(s.decode('utf-8'))}) d = dict() for f in arrs.dtype.fields: dat = arrs[f] if dat.dtype == np.object: dat = dat.tolist() d[f] = handle_units(dat, units.get(f, None)) return d
def test_load_periodic(self): from io import BytesIO stream = BytesIO(b"""$Periodic 1 0 1 3 1 1 3 $EndPeriodic""") # a triangle. # Check header. self.assertEqual(stream.readline(), b'$Periodic\n') # Check periodic body. res = gmsh.Gmsh._load_periodic(stream) self.assertEqual(list(res.keys()), ['periodics']) res = res['periodics'] self.assertEqual(len(res), 1) res = res[0] self.assertEqual( str([(key, res[key]) for key in sorted(res.keys())]), str([('mtag', 3), ('ndim', 0), ('nodes', np.array([[1, 3]], dtype='int32')), ('stag', 1)])) # Check trailing. self.assertEqual(stream.readline(), b'')
def readline(self, size=None): while b'\n' not in self.buffer and \ (size is None or len(self.buffer) < size): # 直到读到 \n 为止 if size: # since size is not None here, len(self.buffer) < size chunk = self._read_limited(size - len(self.buffer)) else: chunk = self._read_limited() if not chunk: break self.buffer += chunk sio = BytesIO(self.buffer) if size: line = sio.readline(size) else: line = sio.readline() self.buffer = sio.read() # 把剩下的数据放入 buffer return line
def import_catalog(self, content, hash): data = BytesIO(content) first_row = data.readline() date = re.match(b'[^\d]+(\d{1,2}).(\d{1,2})\.(\d{4}).*', first_row) if not date: raise CommandError('Incorrect first row') day, month, year = date.groups() set_date = datetime(int(year), int(month), int(day)) data.readline() # Empty line data.readline() # Header line catalog, _ = PriceCatalog.objects.get_or_create( date=set_date, hash=hash, ) for line in data: rd = line.decode('utf-8').split('\t') defaults = { 'name': rd[1], 'brand': rd[2], } for i, field in enumerate(['bottle_size', 'price', 'price_per_liter']): if rd[3 + i]: defaults[field] = float_(rd[3 + i]) defaults['abv'] = float_(rd[20]) if defaults.get('price_per_liter') and defaults.get('abv'): defaults['price_per_alcohol'] = 100 * defaults['price_per_liter'] / defaults['abv'] item, _ = Item.objects.update_or_create( number=int(rd[0]), defaults=defaults ) catalog_item, _ = PriceCatalogItem.objects.get_or_create( price_catalog=catalog, item=item, ) print(item)
def test_version_2_0(): f = BytesIO() # requires more than 2 byte for header dt = [(("%d" % i) * 100, float) for i in range(500)] d = np.ones(1000, dtype=dt) format.write_array(f, d, version=(2, 0)) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', UserWarning) format.write_array(f, d) assert_(w[0].category is UserWarning) # check alignment of data portion f.seek(0) header = f.readline() assert_(len(header) % format.ARRAY_ALIGN == 0) f.seek(0) n = format.read_array(f) assert_array_equal(d, n) # 1.0 requested but data cannot be saved this way assert_raises(ValueError, format.write_array, f, d, (1, 0))
class TwoWayStringIO(object): def __init__(self, readable_data): self.readable_data = readable_data self.read_buffer = BytesIO(self.readable_data) self.write_buffer = BytesIO() self.written_data = None self.closed = False def read(self, *args, **kwargs): return self.read_buffer.read(*args, **kwargs) def readline(self, *args, **kwargs): return self.read_buffer.readline(*args, **kwargs) def write(self, *args, **kwargs): return self.write_buffer.write(*args, **kwargs) def close(self): self.closed = True self.written_data = self.write_buffer.getvalue() self.read_buffer.close() def flush(self): self.write_buffer.flush()
class FastIO(IOBase): newlines = 0 def __init__(self, file): import os self.os = os self._fd = file.fileno() self.buffer = BytesIO() self.writable = "x" in file.mode or "r" not in file.mode self.write = self.buffer.write if self.writable else None def read(self): while True: b = self.os.read(self._fd, max(self.os.fstat(self._fd).st_size, BUFSIZE)) if not b: break ptr = self.buffer.tell() self.buffer.seek(0, 2), self.buffer.write(b), self.buffer.seek(ptr) self.newlines = 0 return self.buffer.read() def readline(self): while self.newlines == 0: b = self.os.read(self._fd, max(self.os.fstat(self._fd).st_size, BUFSIZE)) self.newlines = b.count(b"\n") + (not b) ptr = self.buffer.tell() self.buffer.seek(0, 2), self.buffer.write(b), self.buffer.seek(ptr) self.newlines -= 1 return self.buffer.readline() def flush(self): if self.writable: self.os.write(self._fd, self.buffer.getvalue()) self.buffer.truncate(0), self.buffer.seek(0)
class TestUnSeekable: def __init__(self, text): if not isinstance(text, bytes): text = text.encode('utf-8') self._file = BytesIO(text) self.log = [] def tell(self): return self._file.tell() def seek(self, offset, whence=0): assert False def read(self, size=-1): self.log.append(("read", size)) return self._file.read(size) def readline(self, size=-1): self.log.append(("readline", size)) return self._file.readline(size) def readlines(self, sizehint=-1): self.log.append(("readlines", sizehint)) return self._file.readlines(sizehint)
def extract_url(self, data, host, port, default_port): if self.first_line_read: return self.first_line_read = True buff = BytesIO(data) line = to_native_str(buff.readline(), 'latin-1') parts = line.split(' ', 2) verb = parts[0] path = parts[1] if verb == "CONNECT": parts = path.split(":", 1) self.connect_host = parts[0] self.connect_port = int(parts[1]) if len(parts) > 1 else default_port self.warc_headers['WARC-Proxy-Host'] = "https://{0}:{1}".format(host, port) return if self.connect_host: host = self.connect_host if self.connect_port: port = self.connect_port if path.startswith(('http:', 'https:')): self.warc_headers['WARC-Proxy-Host'] = "http://{0}:{1}".format(host, port) self.url = path return scheme = 'https' if default_port == 443 else 'http' self.url = scheme + '://' + host if port != default_port: self.url += ':' + str(port) self.url += path
class DummySocket(object): def __init__(self): self.queue = [] self._buffer = BytesIO() self._read_counter = 0 self.can_read = False @property def buffer(self): return memoryview(self._buffer.getvalue()[self._read_counter:]) def advance_buffer(self, amt): self._read_counter += amt self._buffer.read(amt) def send(self, data): if not isinstance(data, bytes): raise TypeError() self.queue.append(data) def recv(self, l): data = self._buffer.read(l) self._read_counter += len(data) return memoryview(data) def close(self): pass def readline(self): line = self._buffer.readline() self._read_counter += len(line) return memoryview(line) def fill(self): pass
def from_bytes(cls, bytestr): bio = BytesIO(bytestr) reader = RequestReader() state = reader.state while True: if state is M.Complete: break elif state.type == M.NeedLine.type: line = bio.readline() # TODO: limit? next_state = M.HaveLine(value=line) elif state.type == M.NeedData.type: data = bio.read(state.amount) # TODO: can this block or return None if empty etc? next_state = M.HaveData(value=data) elif state.type == M.NeedPeek.type: peeked = bio.peek(state.amount) if not peeked: pass # TODO: again, what happens on end of stream next_state = M.HavePeek(amount=peeked) else: raise RuntimeError('Unknown state %r' % (state,)) state = reader.send(next_state) return reader.raw_request
def get(self, path, host=None, headers: dict = None): headers = headers or {} host = host or self._host if host: headers['Host'] = host headers_str = '\r\n'.join(f'{key}: {val}' for (key, val) in headers.items()) http_query = f'GET {path} HTTP/1.0\r\n{headers_str}\r\n\r\n' self._stream.send(http_query.encode()) raw_response = recv_all(self._stream) header, body = raw_response.split(b'\r\n\r\n', 1) f = BytesIO(header) request_line = f.readline().split(b' ') protocol, status = request_line[:2] headers = parse_headers(f) if headers['Content-Encoding'] == 'deflate': body = zlib.decompress(body) elif headers['Content-Encoding'] == 'gzip': body = gzip.decompress(body) return int(status), body
def _build_HTTPMessage(self, raw_headers): status_and_headers = BytesIO(raw_headers) # Get rid of the status line status_and_headers.readline() msg = parse_headers(status_and_headers) return msg.get
class HTTPRequest(BaseHTTPRequestHandler): def __init__(self, reqFile=None, **kwargs): self.regexContentLengthPattern = re.compile( r"\s*[c|C]ontent-[l|L]ength:\s*([0-9]+)\s*") self.session = kwargs.get('session', None) if self.session == None: self.session = requests.Session() self.instanciateConnection(**kwargs) if reqFile != None: with open(reqFile, 'rb') as requestFile: self.updateRequestFromRawValue(requestFile.read()) #instanciate the request from raw def updateRequestFromRawValue(self, raw): self.rfile = BytesIO(raw) self.raw_requestline = self.rfile.readline() #self.raw_requestline=requestFile.readline() self.error_code = self.error_message = None #from BaseHTTPRequesetHandler class self.parse_request() #the above function does not parse request body self.parseRequestBody() #transfer cookies (from requestFile) from header to Cookie param self.parseCookiesFromHeaders() #response filled later self.response = None self.addURLToRequest() def instanciateConnection(self, **kwargs): self.connection = Connection.Connection(**kwargs) def printFields(self): print("Error Code: ", self.error_code) # None (check this first) print("Command: ", self.command) # "GET" print("Path: ", self.path) # "/who/ken/trust.html" #print self.request_version # "HTTP/1.1" print("Nb of Headers: ", len(self.headers)) # 3 print("Header keys: ", self.headers.keys()) # ['accept-charset', 'host', 'accept'] print("URL: ", self.URL) print(self.cookies.output(attrs=[], header="Cookie:")) if self.command == "POST": print("Post body.:", self.postBody) print("rfile:", self.rfile.getvalue()) # create the URL from host, path and protocol def addURLToRequest(self): url = self.headers['host'] + self.path if self.connection.isTLS: url = "https://" + url else: url = "http://" + url self.URL = url def parseRequestBody(self): if 'content-length' in self.headers.keys(): content_len = int(self.headers['content-length'], 0) self.postBody = self.rfile.read(content_len) elif 'Content-Length' in self.headers.keys(): content_len = int(self.headers['Content-Length'], 0) self.postBody = self.rfile.read(content_len) else: self.postBody = None def updateContentLengthInRawReq(self, newLength): if self.command == "POST": #return to the beginning self.rfile.seek(0) #the first line contain the command self.rfile.readline() while True: line = str(self.rfile.readline(), 'utf-8') if not line: break pattern = self.regexContentLengthPattern.search(line) if pattern: raw = str(self.rfile.getvalue(), 'utf-8').replace( pattern.string, "content-length: " + str(newLength) + " \r\n") self.updateRequestFromRawValue(bytes(raw, 'utf-8')) break def getContentLengthInRawReq(self): if self.command == "POST": #return to the beginning self.rfile.seek(0) #the first line contain the command self.rfile.readline() while True: line = str(self.rfile.readline(), 'utf-8') if not line: break pattern = self.regexContentLengthPattern.search(line) if pattern: return int(pattern.group(1)) def parseCookiesFromHeaders(self): #add to the cookie param self.cookies = http.cookies.SimpleCookie() if 'cookie' in self.headers.keys(): self.cookies.load('Cookie: ' + self.headers['cookie']) #remove the cookie from the (normal) header del self.headers['cookie'] if 'Cookie' in self.headers.keys(): self.cookies.load('Cookie: ' + self.headers['Cookie']) del self.headers['Cookie'] def send_error(self, code, message): self.error_code = code self.error_message = message #to be optimized later (the whole request is put to memory then replace then re-write, not so efficient) def replaceString(self, strToBeDel, strToBePst): if strToBeDel in str(self.rfile.getvalue(), 'utf-8'): strng = str(self.rfile.getvalue(), 'utf-8').replace(strToBeDel, strToBePst) self.updateRequestFromRawValue(bytes(strng, 'utf-8')) previousContLeng = self.getContentLengthInRawReq() #for now only utf-8 is supported, so the length should be in bytes here. delta = len(strToBePst) - len(strToBeDel) self.updateContentLengthInRawReq(previousContLeng + delta) def send(self): #cookies must be formatted for Requests lib cookies = {} for key, morsel in self.cookies.items(): cookies[key] = morsel.value if self.command == "GET": self.response = self.session.get( self.URL, headers=self.headers, proxies=self.connection.proxies, cookies=cookies, verify=self.connection.verifyTLSCert) if self.command == "POST": self.response = self.session.post( self.URL, headers=self.headers, proxies=self.connection.proxies, cookies=cookies, verify=self.connection.verifyTLSCert, data=self.postBody) if self.command == "HEAD": self.response = self.session.head( self.URL, headers=self.headers, proxies=self.connection.proxies, cookies=cookies, verify=self.connection.verifyTLSCert, data=self.postBody) if self.command == "PUT": self.response = self.session.put( self.URL, headers=self.headers, proxies=self.connection.proxies, cookies=cookies, verify=self.connection.verifyTLSCert, data=self.postBody) if self.command == "DELETE": self.response = self.session.delete( self.URL, headers=self.headers, proxies=self.connection.proxies, cookies=cookies, verify=self.connection.verifyTLSCert, data=self.postBody) self.elapsed = self.response.elapsed.total_seconds()
class HTTPRequest(BaseHTTPRequestHandler): # http://stackoverflow.com/a/5955949 scheme = 'http' # noinspection PyMissingConstructor def __init__(self, request_bytes, scheme): assert isinstance(request_bytes, bytes) self.scheme = scheme self.rfile = BytesIO(request_bytes) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request() if self.error_message: raise Exception(self.error_message) # Replace headers with simple dict to coup differences in Py2 and Py3 self.headers = capitalize_keys(dict(getattr(self, 'headers', {}))) def send_error(self, code, message=None, explain=None): self.error_code = code self.error_message = message def extract_fields(self, field=None, available_fields=None): if available_fields is None: available_fields = AVAILABLE_FIELDS if (field is not None) and field not in available_fields: msg = "Unexpected field '{}'. Expected one of {}." msg = msg.format(field, ', '.join(available_fields)) raise ValueError(msg) if field is None: field = '|'.join(available_fields) is_field = r':({}) (.+): (.+)'.format(field) fields = [] remaining_request = [] cursor = self.rfile.tell() for i, line in enumerate(self.rfile.readlines()): line = line.decode('utf-8') try: field, key, val = re.match(is_field, line).groups() except AttributeError: remaining_request.append(line) continue fields.append((field.strip(), key.strip(), val.strip())) remaining_request = BytesIO( '\n'.join(remaining_request).encode('utf-8').strip()) remaining_request.seek(0) self.rfile.seek(cursor) return (fields, remaining_request) def auth(self): try: method, token = self.headers.get('Authorization').split() except (AttributeError, KeyError, ValueError): return None, None if not isinstance(token, bytes): token = token.encode('utf-8') if method == 'Basic': return method, base64.b64decode(token).decode('utf-8') else: return method, token def url(self): base_url = '{}://{}{}'.format(self.scheme, self.headers.get('Host', 'nohost'), self.path) params, _ = self.extract_fields('query') params = [(p[1], p[2]) for p in params] if params: new_url = add_url_params(base_url, params) else: new_url = base_url return new_url def data(self): _, payload_bytes = self.extract_fields(None) payload_bytes = payload_bytes.read() if payload_bytes: if is_json(self.headers.get('Content-Type', '')): assert isinstance(payload_bytes, bytes) payload_str = payload_bytes.decode('utf-8') return ordered(json.loads(payload_str)) else: return payload_bytes
class HttpRequest(object): """A basic HTTP request.""" # The encoding used in GET/POST dicts. None means use default setting. _encoding = None _upload_handlers = [] def __init__(self): # WARNING: The `WSGIRequest` subclass doesn't call `super`. # Any variable assignment made here should also happen in # `WSGIRequest.__init__()`. self.GET = QueryDict(mutable=True) self.POST = QueryDict(mutable=True) self.COOKIES = {} self.META = {} self.FILES = MultiValueDict() self.path = '' self.path_info = '' self.method = None self.resolver_match = None self._post_parse_error = False self.content_type = None self.content_params = None def __repr__(self): if self.method is None or not self.get_full_path(): return force_str('<%s>' % self.__class__.__name__) return force_str( '<%s: %s %r>' % (self.__class__.__name__, self.method, force_str(self.get_full_path())) ) def _get_raw_host(self): """ Return the HTTP host using the environment or request headers. Skip allowed hosts protection, so may return an insecure host. """ # We try three options, in order of decreasing preference. if settings.USE_X_FORWARDED_HOST and ( 'HTTP_X_FORWARDED_HOST' in self.META): host = self.META['HTTP_X_FORWARDED_HOST'] elif 'HTTP_HOST' in self.META: host = self.META['HTTP_HOST'] else: # Reconstruct the host using the algorithm from PEP 333. host = self.META['SERVER_NAME'] server_port = self.get_port() if server_port != ('443' if self.is_secure() else '80'): host = '%s:%s' % (host, server_port) return host def get_host(self): """Return the HTTP host using the environment or request headers.""" host = self._get_raw_host() # There is no hostname validation when DEBUG=True if settings.DEBUG: return host domain, port = split_domain_port(host) if domain and validate_host(domain, settings.ALLOWED_HOSTS): return host else: msg = "Invalid HTTP_HOST header: %r." % host if domain: msg += " You may need to add %r to ALLOWED_HOSTS." % domain else: msg += " The domain name provided is not valid according to RFC 1034/1035." raise DisallowedHost(msg) def get_port(self): """Return the port number for the request as a string.""" if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META: port = self.META['HTTP_X_FORWARDED_PORT'] else: port = self.META['SERVER_PORT'] return str(port) def get_full_path(self, force_append_slash=False): # RFC 3986 requires query string arguments to be in the ASCII range. # Rather than crash if this doesn't happen, we encode defensively. return '%s%s%s' % ( escape_uri_path(self.path), '/' if force_append_slash and not self.path.endswith('/') else '', ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else '' ) def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None): """ Attempts to return a signed cookie. If the signature fails or the cookie has expired, raises an exception... unless you provide the default argument in which case that value will be returned instead. """ try: cookie_value = self.COOKIES[key] except KeyError: if default is not RAISE_ERROR: return default else: raise try: value = signing.get_cookie_signer(salt=key + salt).unsign( cookie_value, max_age=max_age) except signing.BadSignature: if default is not RAISE_ERROR: return default else: raise return value def get_raw_uri(self): """ Return an absolute URI from variables available in this request. Skip allowed hosts protection, so may return insecure URI. """ return '{scheme}://{host}{path}'.format( scheme=self.scheme, host=self._get_raw_host(), path=self.get_full_path(), ) def build_absolute_uri(self, location=None): """ Builds an absolute URI from the location and the variables available in this request. If no ``location`` is specified, the absolute URI is built on ``request.get_full_path()``. Anyway, if the location is absolute, it is simply converted to an RFC 3987 compliant URI and returned and if location is relative or is scheme-relative (i.e., ``//example.com/``), it is urljoined to a base URL constructed from the request variables. """ if location is None: # Make it an absolute url (but schemeless and domainless) for the # edge case that the path starts with '//'. location = '//%s' % self.get_full_path() bits = urlsplit(location) if not (bits.scheme and bits.netloc): current_uri = '{scheme}://{host}{path}'.format(scheme=self.scheme, host=self.get_host(), path=self.path) # Join the constructed URL with the provided location, which will # allow the provided ``location`` to apply query strings to the # base path as well as override the host, if it begins with // location = urljoin(current_uri, location) return iri_to_uri(location) def _get_scheme(self): """ Hook for subclasses like WSGIRequest to implement. Returns 'http' by default. """ return 'http' @property def scheme(self): if settings.SECURE_PROXY_SSL_HEADER: try: header, value = settings.SECURE_PROXY_SSL_HEADER except ValueError: raise ImproperlyConfigured( 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.' ) if self.META.get(header) == value: return 'https' return self._get_scheme() def is_secure(self): return self.scheme == 'https' def is_ajax(self): return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' @property def encoding(self): return self._encoding @encoding.setter def encoding(self, val): """ Sets the encoding used for GET/POST accesses. If the GET or POST dictionary has already been created, it is removed and recreated on the next access (so that it is decoded correctly). """ self._encoding = val if hasattr(self, '_get'): del self._get if hasattr(self, '_post'): del self._post def _initialize_handlers(self): self._upload_handlers = [uploadhandler.load_handler(handler, self) for handler in settings.FILE_UPLOAD_HANDLERS] @property def upload_handlers(self): if not self._upload_handlers: # If there are no upload handlers defined, initialize them from settings. self._initialize_handlers() return self._upload_handlers @upload_handlers.setter def upload_handlers(self, upload_handlers): if hasattr(self, '_files'): raise AttributeError("You cannot set the upload handlers after the upload has been processed.") self._upload_handlers = upload_handlers def parse_file_upload(self, META, post_data): """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" self.upload_handlers = ImmutableList( self.upload_handlers, warning="You cannot alter upload handlers after the upload has been processed." ) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) return parser.parse() @property def body(self): if not hasattr(self, '_body'): if self._read_started: raise RawPostDataException("You cannot access body after reading from request's data stream") try: self._body = self.read() except IOError as e: six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2]) self._stream = BytesIO(self._body) return self._body def _mark_post_parse_error(self): self._post = QueryDict('') self._files = MultiValueDict() self._post_parse_error = True def _load_post_and_files(self): """Populate self._post and self._files if the content-type is a form type""" if self.method != 'POST': self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict() return if self._read_started and not hasattr(self, '_body'): self._mark_post_parse_error() return if self.content_type == 'multipart/form-data': if hasattr(self, '_body'): # Use already read data data = BytesIO(self._body) else: data = self try: self._post, self._files = self.parse_file_upload(self.META, data) except MultiPartParserError: # An error occurred while parsing POST data. Since when # formatting the error the request handler might access # self.POST, set self._post and self._file to prevent # attempts to parse POST data again. # Mark that an error occurred. This allows self.__repr__ to # be explicit about it instead of simply representing an # empty POST self._mark_post_parse_error() raise elif self.content_type == 'application/x-www-form-urlencoded': self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict() else: self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict() def close(self): if hasattr(self, '_files'): for f in chain.from_iterable(l[1] for l in self._files.lists()): f.close() # File-like and iterator interface. # # Expects self._stream to be set to an appropriate source of bytes by # a corresponding request subclass (e.g. WSGIRequest). # Also when request data has already been read by request.POST or # request.body, self._stream points to a BytesIO instance # containing that data. def read(self, *args, **kwargs): self._read_started = True try: return self._stream.read(*args, **kwargs) except IOError as e: six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2]) def readline(self, *args, **kwargs): self._read_started = True try: return self._stream.readline(*args, **kwargs) except IOError as e: six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2]) def xreadlines(self): while True: buf = self.readline() if not buf: break yield buf __iter__ = xreadlines def readlines(self): return list(iter(self))
class HTTPRequest(BaseHTTPRequestHandler): def __init__(self, request_text): self.rfile = BytesIO(request_text) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request()
def nntp_server(self): """ A fake nntp server that generates responses like a real one It lets us test the protocol by simulating different responses. """ # Set io_wait flag self._io_wait.set() # Send Welcome Message if not self.sent_welcome: welcome_str = "200 l2g.caronc.dummy NNRP Service Ready" if self._can_post: welcome_str += " (posting ok)" if self._has_yenc: welcome_str += " (yEnc enabled)" try: self.socket.send(welcome_str + NNTP_EOD) except: # connection lost # print('DEBUG: SOCKET ERROR DURING SEND (EXITING)....') return self.sent_welcome = True data = BytesIO() d_len = data.tell() while self._active.is_set() and self.socket.connected: # print('DEBUG: SERVER LOOP') # ptr manipulation d_ptr = data.tell() if d_ptr > 32768: # Truncate data = BytesIO(data.read()) d_ptr = 0 data.seek(d_ptr) try: # print('DEBUG: SERVER BLOCKING FOR DATA') pending = self.socket.can_read(0.8) if pending is None: # No more data continue if not pending: # nothing pending; back to io_wait continue while self.socket.can_read(): # print('DEBUG: SERVER BLOCKING FOR DATA....') _data = self.socket.read() if not _data: # print('DEBUG: SERVER NO DATA (EXITING)....') # Reset our settings to prepare for another connection self.reset() return # print('DEBUG: SERVER READ DATA: %s' % _data.rstrip()) # Buffer response data.write(_data) d_len = data.tell() except (socket.error, SocketException): # Socket Issue # print('DEBUG: SOCKET ERROR (EXITING)....') # print('DEBUG: ERROR %s' % str(e)) # Reset our sent_welcome flag self.sent_welcome = False return # Seek End for size if d_ptr == d_len: continue data.seek(d_ptr) # Acquire our line line = data.readline() # Build our response response = self.put(line) # Return it on the socket try: self.socket.send(response + NNTP_EOD) except: # connection lost # print('DEBUG: SOCKET ERROR DURING SEND (EXITING)....') return
def raster( self, inputs, bands=None, scales=None, data_type=None, output_format='GTiff', srs=None, dimensions=None, resolution=None, bounds=None, bounds_srs=None, cutline=None, place=None, align_pixels=False, resampler=None, dltile=None, save=False, outfile_basename=None, **pass_through_params ): """Given a list of :class:`Metadata <descarteslabs.services.Metadata>` identifiers, retrieve a translated and warped mosaic as an image file. :param inputs: List of :class:`Metadata` identifiers. :param bands: List of requested bands. If the last item in the list is an alpha band (with data range `[0, 1]`) it affects rastering of all other bands: When rastering multiple images, they are combined image-by-image only where each respective image's alpha band is `1` (pixels where the alpha band is not `1` are "transparent" in the overlap between images). If a pixel is fully masked considering all combined alpha bands it will be `0` in all non-alpha bands. :param scales: List of tuples specifying the scaling to be applied to each band. A tuple has 4 elements in the order ``(src_min, src_max, out_min, out_max)``, meaning values in the source range ``src_min`` to ``src_max`` will be scaled to the output range ``out_min`` to ``out_max``. A tuple with 2 elements ``(src_min, src_max)`` is also allowed, in which case the output range defaults to ``(0, 255)`` (a useful default for the common output type ``Byte``). If no scaling is desired for a band, use ``None``. This tuple format and behaviour is identical to GDAL's scales during translation. Example argument: ``[(0, 10000, 0, 127), None, (0, 10000)]`` - the first band will have source values 0-10000 scaled to 0-127, the second band will not be scaled, the third band will have 0-10000 scaled to 0-255. :param str output_format: Output format (one of ``GTiff``, ``PNG``, ``JPEG``). :param str data_type: Output data type (one of ``Byte``, ``UInt16``, ``Int16``, ``UInt32``, ``Int32``, ``Float32``, ``Float64``). :param str srs: Output spatial reference system definition understood by GDAL. :param float resolution: Desired resolution in output SRS units. Incompatible with `dimensions` :param tuple dimensions: Desired output (width, height) in pixels. Incompatible with `resolution` :param str cutline: A GeoJSON feature or geometry to be used as a cutline. :param str place: A slug identifier to be used as a cutline. :param tuple bounds: ``(min_x, min_y, max_x, max_y)`` in target SRS. :param str bounds_srs: Override the coordinate system in which bounds are expressed. :param bool align_pixels: Align pixels to the target coordinate system. :param str resampler: Resampling algorithm to be used during warping (``near``, ``bilinear``, ``cubic``, ``cubicsplice``, ``lanczos``, ``average``, ``mode``, ``max``, ``min``, ``med``, ``q1``, ``q3``). :param str dltile: a dltile key used to specify the resolution, bounds, and srs. :param bool save: Write resulting files to disk. Default: False :param str outfile_basename: If 'save' is True, override default filename using this string as a base. :return: A dictionary with two keys, ``files`` and ``metadata``. The value for ``files`` is a dictionary mapping file names to binary data for files (at the moment there will always be only a single file with the appropriate file extension based on the ``output_format`` requested). The value for ``metadata`` is a dictionary containing details about the raster operation that happened. These details can be useful for debugging but shouldn't otherwise be relied on (there are no guarantees that certain keys will be present). """ cutline = as_json_string(cutline) if place: places = Places() places.auth = self.auth shape = places.shape(place, geom='low') cutline = json.dumps(shape['geometry']) params = { 'keys': inputs, 'bands': bands, 'scales': scales, 'ot': data_type, 'of': output_format, 'srs': srs, 'resolution': resolution, 'shape': cutline, 'outputBounds': bounds, 'outputBoundsSRS': bounds_srs, 'outsize': dimensions, 'targetAlignedPixels': align_pixels, 'resampleAlg': resampler, } params.update(pass_through_params) if dltile is not None: if isinstance(dltile, dict): params['dltile'] = dltile['properties']['key'] else: params['dltile'] = dltile r = self.session.post('/raster', json=params) raw = BytesIO(r.content) json_resp = json.loads(raw.readline().decode('utf-8').strip()) num_files = json_resp['files'] json_resp['files'] = {} for _ in range(num_files): file_meta = json.loads(raw.readline().decode('utf-8').strip()) fn = file_meta['name'] data = raw.read(file_meta['length']) if outfile_basename: outfilename = "{}.{}".format( outfile_basename, ".".join(os.path.basename(fn).split(".")[1:]) ) else: outfilename = fn json_resp['files'][outfilename] = data if save: for filename, data in six.iteritems(json_resp['files']): with open(filename, "wb") as f: f.write(data) return DotDict(json_resp)
class HttpRequest(object): """A basic HTTP request.""" # The encoding used in GET/POST dicts. None means use default setting. _encoding = None _upload_handlers = [] def __init__(self): self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {} self.path = '' self.path_info = '' self.method = None self._post_parse_error = False def __repr__(self): return build_request_repr(self) def get_host(self): """Returns the HTTP host using the environment or request headers.""" # We try three options, in order of decreasing preference. if settings.USE_X_FORWARDED_HOST and ( 'HTTP_X_FORWARDED_HOST' in self.META): host = self.META['HTTP_X_FORWARDED_HOST'] elif 'HTTP_HOST' in self.META: host = self.META['HTTP_HOST'] else: # Reconstruct the host using the algorithm from PEP 333. host = self.META['SERVER_NAME'] server_port = str(self.META['SERVER_PORT']) if server_port != (self.is_secure() and '443' or '80'): host = '%s:%s' % (host, server_port) return host def get_full_path(self): # RFC 3986 requires query string arguments to be in the ASCII range. # Rather than crash if this doesn't happen, we encode defensively. return '%s%s' % (self.path, self.META.get('QUERY_STRING', '') and ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) or '') def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None): """ Attempts to return a signed cookie. If the signature fails or the cookie has expired, raises an exception... unless you provide the default argument in which case that value will be returned instead. """ try: cookie_value = self.COOKIES[key].encode('utf-8') except KeyError: if default is not RAISE_ERROR: return default else: raise try: value = signing.get_cookie_signer(salt=key + salt).unsign( cookie_value, max_age=max_age) except signing.BadSignature: if default is not RAISE_ERROR: return default else: raise return value def build_absolute_uri(self, location=None): """ Builds an absolute URI from the location and the variables available in this request. If no location is specified, the absolute URI is built on ``request.get_full_path()``. """ if not location: location = self.get_full_path() if not absolute_http_url_re.match(location): current_uri = '%s://%s%s' % (self.is_secure() and 'https' or 'http', self.get_host(), self.path) location = urljoin(current_uri, location) return iri_to_uri(location) def _is_secure(self): return os.environ.get("HTTPS") == "on" def is_secure(self): # First, check the SECURE_PROXY_SSL_HEADER setting. if settings.SECURE_PROXY_SSL_HEADER: try: header, value = settings.SECURE_PROXY_SSL_HEADER except ValueError: raise ImproperlyConfigured('The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.') if self.META.get(header, None) == value: return True # Failing that, fall back to _is_secure(), which is a hook for # subclasses to implement. return self._is_secure() def is_ajax(self): return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' def _set_encoding(self, val): """ Sets the encoding used for GET/POST accesses. If the GET or POST dictionary has already been created, it is removed and recreated on the next access (so that it is decoded correctly). """ self._encoding = val if hasattr(self, '_get'): del self._get if hasattr(self, '_post'): del self._post def _get_encoding(self): return self._encoding encoding = property(_get_encoding, _set_encoding) def _initialize_handlers(self): self._upload_handlers = [uploadhandler.load_handler(handler, self) for handler in settings.FILE_UPLOAD_HANDLERS] def _set_upload_handlers(self, upload_handlers): if hasattr(self, '_files'): raise AttributeError("You cannot set the upload handlers after the upload has been processed.") self._upload_handlers = upload_handlers def _get_upload_handlers(self): if not self._upload_handlers: # If there are no upload handlers defined, initialize them from settings. self._initialize_handlers() return self._upload_handlers upload_handlers = property(_get_upload_handlers, _set_upload_handlers) def parse_file_upload(self, META, post_data): """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" self.upload_handlers = ImmutableList( self.upload_handlers, warning = "You cannot alter upload handlers after the upload has been processed." ) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) return parser.parse() @property def body(self): if not hasattr(self, '_body'): if self._read_started: raise Exception("You cannot access body after reading from request's data stream") try: self._body = self.read() except IOError as e: raise UnreadablePostError, e, sys.exc_traceback self._stream = BytesIO(self._body) return self._body @property def raw_post_data(self): warnings.warn('HttpRequest.raw_post_data has been deprecated. Use HttpRequest.body instead.', DeprecationWarning) return self.body def _mark_post_parse_error(self): self._post = QueryDict('') self._files = MultiValueDict() self._post_parse_error = True def _load_post_and_files(self): # Populates self._post and self._files if self.method != 'POST': self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict() return if self._read_started and not hasattr(self, '_body'): self._mark_post_parse_error() return if self.META.get('CONTENT_TYPE', '').startswith('multipart'): if hasattr(self, '_body'): # Use already read data data = BytesIO(self._body) else: data = self try: self._post, self._files = self.parse_file_upload(self.META, data) except: # An error occured while parsing POST data. Since when # formatting the error the request handler might access # self.POST, set self._post and self._file to prevent # attempts to parse POST data again. # Mark that an error occured. This allows self.__repr__ to # be explicit about it instead of simply representing an # empty POST self._mark_post_parse_error() raise else: self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict() ## File-like and iterator interface. ## ## Expects self._stream to be set to an appropriate source of bytes by ## a corresponding request subclass (e.g. WSGIRequest). ## Also when request data has already been read by request.POST or ## request.body, self._stream points to a BytesIO instance ## containing that data. def read(self, *args, **kwargs): self._read_started = True return self._stream.read(*args, **kwargs) def readline(self, *args, **kwargs): self._read_started = True return self._stream.readline(*args, **kwargs) def xreadlines(self): while True: buf = self.readline() if not buf: break yield buf __iter__ = xreadlines def readlines(self): return list(iter(self))
class ReturnCodeToSubunit(object): """Converts a process return code to a subunit error on the process stdout. The ReturnCodeToSubunit object behaves as a readonly stream, supplying the read, readline and readlines methods. If the process exits non-zero a synthetic test is added to the output, making the error accessible to subunit stream consumers. If the process closes its stdout and then does not terminate, reading from the ReturnCodeToSubunit stream will hang. This class will be deleted at some point, allowing parsing to read from the actual fd and benefit from select for aggregating non-subunit output. """ def __init__(self, process): """Adapt a process to a readable stream. :param process: A subprocess.Popen object that is generating subunit. """ self.proc = process self.done = False self.source = self.proc.stdout self.lastoutput = LINEFEED def _append_return_code_as_test(self): if self.done is True: return self.source = BytesIO() returncode = self.proc.wait() if returncode != 0: if self.lastoutput != LINEFEED: # Subunit V1 is line orientated, it has to start on a fresh # line. V2 needs to start on any fresh utf8 character border # - which is not guaranteed in an arbitrary stream endpoint, so # injecting a \n gives us such a guarantee. self.source.write(_b('\n')) if v2_avail: stream = subunit.StreamResultToBytes(self.source) stream.status(test_id='process-returncode', test_status='fail', file_name='traceback', mime_type='text/plain;charset=utf8', file_bytes=('returncode %d' % returncode).encode('utf8')) else: self.source.write( _b('test: process-returncode\n' 'failure: process-returncode [\n' ' returncode %d\n' ']\n' % returncode)) self.source.seek(0) self.done = True def read(self, count=-1): if count == 0: return _b('') result = self.source.read(count) if result: self.lastoutput = result[-1] return result self._append_return_code_as_test() return self.source.read(count) def readline(self): result = self.source.readline() if result: self.lastoutput = result[-1] return result self._append_return_code_as_test() return self.source.readline() def readlines(self): result = self.source.readlines() if result: self.lastoutput = result[-1][-1] self._append_return_code_as_test() result.extend(self.source.readlines()) return result
class VCRHTTPResponse(HTTPResponse): """ Stub response class that gets returned instead of a HTTPResponse """ def __init__(self, recorded_response): self.fp = None self.recorded_response = recorded_response self.reason = recorded_response["status"]["message"] self.status = self.code = recorded_response["status"]["code"] self.version = None self._content = BytesIO(self.recorded_response["body"]["string"]) self._closed = False headers = self.recorded_response["headers"] # Since we are loading a response that has already been serialized, our # response is no longer chunked. That means we don't want any # libraries trying to process a chunked response. By removing the # transfer-encoding: chunked header, this should cause the downstream # libraries to process this as a non-chunked response. te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"] if te_key: del headers[te_key[0]] self.headers = self.msg = parse_headers(headers) self.length = compat.get_header(self.msg, "content-length") or None @property def closed(self): # in python3, I can't change the value of self.closed. So I' # twiddling self._closed and using this property to shadow the real # self.closed from the superclas return self._closed def read(self, *args, **kwargs): return self._content.read(*args, **kwargs) def readall(self): return self._content.readall() def readinto(self, *args, **kwargs): return self._content.readinto(*args, **kwargs) def readline(self, *args, **kwargs): return self._content.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self._content.readlines(*args, **kwargs) def seekable(self): return self._content.seekable() def tell(self): return self._content.tell() def isatty(self): return self._content.isatty() def seek(self, *args, **kwargs): return self._content.seek(*args, **kwargs) def close(self): self._closed = True return True def getcode(self): return self.status def isclosed(self): return self.closed def info(self): return parse_headers(self.recorded_response["headers"]) def getheaders(self): message = parse_headers(self.recorded_response["headers"]) return list(compat.get_header_items(message)) def getheader(self, header, default=None): values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()] if values: return ", ".join(values) else: return default def readable(self): return self._content.readable()
fStringIO.write('Hello') fStringIO.write(' ') fStringIO.write('World!') # getvalue()方法用于获得写入后的str。 print(fStringIO.getvalue()) # 要读取StringIO,可以用一个str初始化StringIO,然后,像读文件一样读取: fStringIO1 = StringIO('Hello\nHi\nBybey') while True: s = fStringIO1.readline() if s == '': break print(s.strip()) # BytesIO print('BytesIO========================================================') # StringIO操作的只能是str,如果要操作二进制数据,就需要使用BytesIO。 # BytesIO实现了在内存中读写bytes,我们创建一个BytesIO,然后写入一些bytes: from io import BytesIO fByteIO = BytesIO() # 请注意,写入的不是str,而是经过UTF-8编码的bytes。 fByteIO.write('哈喽'.encode('utf-8')) print(fByteIO.getvalue()) # 和StringIO类似,可以用一个bytes初始化BytesIO,然后,像读文件一样读取: fByteIO1 = BytesIO(b'\xe5\x93\x88\xe5\x96\xbd') print(fByteIO1.readline()) # 小结 # StringIO和BytesIO是在内存中操作str和bytes的方法,使得和读写文件具有一致的接口。
def parseResponse(self, rawheader, rawbody=None, type="curl"): self.__content = "" self._headers = [] tp = TextParser() tp.setSource("string", rawheader) tp.readUntil(r"(HTTP/[0-9.]+) ([0-9]+)") while True: while True: try: self.protocol = tp[0][0] except Exception: self.protocol = "unknown" try: self.code = tp[0][1] except Exception: self.code = "0" if self.code != "100": break else: tp.readUntil(r"(HTTP/[0-9.]+) ([0-9]+)") self.code = int(self.code) while True: tp.readLine() if tp.search("^([^:]+): ?(.*)$"): self.addHeader(tp[0][0], tp[0][1]) else: break # curl sometimes sends two headers when using follow, 302 and the final header # also when using proxies tp.readLine() if not tp.search(r"(HTTP/[0-9.]+) ([0-9]+)"): break else: self._headers = [] # ignore CRLFs until request line while tp.lastline == "" and tp.readLine(): pass # TODO: this should be added to rawbody not directly to __content if tp.lastFull_line: self.addContent(tp.lastFull_line) while tp.skip(1): self.addContent(tp.lastFull_line) if type == "curl": self.delHeader("Transfer-Encoding") if self.header_equal("Transfer-Encoding", "chunked"): result = "" content = BytesIO(rawbody) hexa = content.readline() nchunk = int(hexa.strip(), 16) while nchunk: result += content.read(nchunk) content.readline() hexa = content.readline() nchunk = int(hexa.strip(), 16) rawbody = result if self.header_equal("Content-Encoding", "gzip"): compressedstream = BytesIO(rawbody) gzipper = gzip.GzipFile(fileobj=compressedstream) rawbody = gzipper.read() self.delHeader("Content-Encoding") elif self.header_equal("Content-Encoding", "deflate"): deflated_data = None try: deflater = zlib.decompressobj() deflated_data = deflater.decompress(rawbody) deflated_data += deflater.flush() except zlib.error: try: deflater = zlib.decompressobj(-zlib.MAX_WBITS) deflated_data = deflater.decompress(rawbody) deflated_data += deflater.flush() except zlib.error: deflated_data = "" rawbody = deflated_data self.delHeader("Content-Encoding") if rawbody is not None: # Try to get charset encoding from headers content_encoding = get_encoding_from_headers( dict(self.getHeaders())) # fallback to default encoding if content_encoding is None: content_encoding = "utf-8" self.__content = python2_3_convert_from_unicode( rawbody.decode(content_encoding, errors="replace"))
def test_limit(self): s = BytesIO(b"foobar\nfoobar") s = tcp.Reader(s) assert s.readline(3) == b"foo"
def test_dump(): X_sparse, y_dense = load_svmlight_file(datafile) X_dense = X_sparse.toarray() y_sparse = sp.csr_matrix(y_dense) # slicing a csr_matrix can unsort its .indices, so test that we sort # those correctly X_sliced = X_sparse[np.arange(X_sparse.shape[0])] y_sliced = y_sparse[np.arange(y_sparse.shape[0])] for X in (X_sparse, X_dense, X_sliced): for y in (y_sparse, y_dense, y_sliced): for zero_based in (True, False): for dtype in [np.float32, np.float64, np.int32, np.int64]: f = BytesIO() # we need to pass a comment to get the version info in; # LibSVM doesn't grok comments so they're not put in by # default anymore. if sp.issparse(y) and y.shape[0] == 1: # make sure y's shape is: (n_samples, n_labels) # when it is sparse y = y.T # Note: with dtype=np.int32 we are performing unsafe casts, # where X.astype(dtype) overflows. The result is # then platform dependent and X_dense.astype(dtype) may be # different from X_sparse.astype(dtype).asarray(). X_input = X.astype(dtype) dump_svmlight_file(X_input, y, f, comment="test", zero_based=zero_based) f.seek(0) comment = f.readline() comment = str(comment, "utf-8") assert "scikit-learn %s" % sklearn.__version__ in comment comment = f.readline() comment = str(comment, "utf-8") assert ["one", "zero"][zero_based] + "-based" in comment X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert X2.dtype == dtype assert_array_equal(X2.sorted_indices().indices, X2.indices) X2_dense = X2.toarray() if sp.issparse(X_input): X_input_dense = X_input.toarray() else: X_input_dense = X_input if dtype == np.float32: # allow a rounding error at the last decimal place assert_array_almost_equal(X_input_dense, X2_dense, 4) assert_array_almost_equal( y_dense.astype(dtype, copy=False), y2, 4) else: # allow a rounding error at the last decimal place assert_array_almost_equal(X_input_dense, X2_dense, 15) assert_array_almost_equal( y_dense.astype(dtype, copy=False), y2, 15)
class InputFile(object): max_buffer_size = 1024*1024 def __init__(self, rfile, length): """File-like object used to provide a seekable view of request body data""" self._file = rfile self.length = length self._file_position = 0 if length > self.max_buffer_size: self._buf = tempfile.TemporaryFile() else: self._buf = BytesIO() @property def _buf_position(self): rv = self._buf.tell() assert rv <= self._file_position return rv def read(self, bytes=-1): assert self._buf_position <= self._file_position if bytes < 0: bytes = self.length - self._buf_position bytes_remaining = min(bytes, self.length - self._buf_position) if bytes_remaining == 0: return b"" if self._buf_position != self._file_position: buf_bytes = min(bytes_remaining, self._file_position - self._buf_position) old_data = self._buf.read(buf_bytes) bytes_remaining -= buf_bytes else: old_data = b"" assert bytes_remaining == 0 or self._buf_position == self._file_position, ( "Before reading buffer position (%i) didn't match file position (%i)" % (self._buf_position, self._file_position)) new_data = self._file.read(bytes_remaining) self._buf.write(new_data) self._file_position += bytes_remaining assert bytes_remaining == 0 or self._buf_position == self._file_position, ( "After reading buffer position (%i) didn't match file position (%i)" % (self._buf_position, self._file_position)) return old_data + new_data def tell(self): return self._buf_position def seek(self, offset): if offset > self.length or offset < 0: raise ValueError if offset <= self._file_position: self._buf.seek(offset) else: self.read(offset - self._file_position) def readline(self, max_bytes=None): if max_bytes is None: max_bytes = self.length - self._buf_position if self._buf_position < self._file_position: data = self._buf.readline(max_bytes) if data.endswith(b"\n") or len(data) == max_bytes: return data else: data = b"" assert self._buf_position == self._file_position initial_position = self._file_position found = False buf = [] max_bytes -= len(data) while not found: readahead = self.read(min(2, max_bytes)) max_bytes -= len(readahead) for i, c in enumerate(readahead): if c == b"\n"[0]: buf.append(readahead[:i+1]) found = True break if not found: buf.append(readahead) if not readahead or not max_bytes: break new_data = b"".join(buf) data += new_data self.seek(initial_position + len(new_data)) return data def readlines(self): rv = [] while True: data = self.readline() if data: rv.append(data) else: break return rv def __next__(self): data = self.readline() if data: return data else: raise StopIteration next = __next__ def __iter__(self): return self
class ParseRequest(BaseHTTPRequestHandler): def __init__(self, request_text): if isinstance(request_text, str): request_text = request_text.encode() assert isinstance(request_text, bytes) is True self.rfile = BytesIO(request_text) self.raw_requestline = self.rfile.readline() self.error_code = self.error_message = None self.parse_request() if b'\r\n' in request_text: self.req_body = request_text.split(b'\r\n')[-1] elif b'\n\r' in request_text: self.req_body = request_text.split(b'\n\r')[-1] else: self.req_body = b'' def send_error(self, code, message): self.error_code = code self.error_message = message def _replace_b(self, old, new): if isinstance(old, bytes): pre = old[:old.find(b'{{')] end = old[old.find(b'}}') + 2:] else: pre = old[:old.find('{{')] end = old[old.find('}}') + 2:] return pre + new + end def _replace_c(self, old, new): if isinstance(old, bytes): pre = old[:old.find(b'[[')] end = old[old.find(b']]') + 2:] else: pre = old[:old.find('[[')] end = old[old.find(']]') + 2:] return pre + new + end def _get_c(self, old): for i in re.findall(r'\[\[(.*)\]\]', old): if os.path.exists(i.strip()): w = [] with open(i.strip()) as fp: for l in fp: w.append(l.strip()) yield str(w).strip() def _eval_option(self, one): batch_words = [] if os.path.exists(one.strip()): gprint("load file form : %s" % one) with open(one.strip()) as fp: for l in fp: o = l.strip() batch_words.append(o) else: try: if '[[' in one and ']]' in one: # gprint("detect file in code") tone = one for d in self._get_c(tone): # gprint("patch %s" % d) one = self._replace_c(one, d) gprint("try parse from python code:\n %s" % colored(one, 'blue')) w = eval(one) if isinstance(w, list): batch_words = w except Exception as e: rprint(str(e)) gprint("only as words") batch_words = one.split() return batch_words def _gen_map(self, data, now_da=[]): if isinstance(data, list) and len(data) > 1: f = data[0] for i in f: a = now_da + [i] yield from self._gen_map(data[1:], a) else: for i in data[0]: b = now_da + [i] yield b def eval_and_replace(self): old = self.req_body.decode('utf8', 'ignore') gprint(old) options = re.findall(r'\{\{(.+?)\}\}', old) eval_res = [] for op in options: pp = self._eval_option(op) eval_res.append(pp) if eval_res: for w in self._gen_map(eval_res): body_old = old for i in w: body_old = self._replace_b(body_old, i) yield body_old, w
def test_dump(): X_sparse, y_dense = load_svmlight_file(datafile) X_dense = X_sparse.toarray() y_sparse = sp.csr_matrix(y_dense) # slicing a csr_matrix can unsort its .indices, so test that we sort # those correctly X_sliced = X_sparse[np.arange(X_sparse.shape[0])] y_sliced = y_sparse[np.arange(y_sparse.shape[0])] for X in (X_sparse, X_dense, X_sliced): for y in (y_sparse, y_dense, y_sliced): for zero_based in (True, False): for dtype in [np.float32, np.float64, np.int32]: f = BytesIO() # we need to pass a comment to get the version info in; # LibSVM doesn't grok comments so they're not put in by # default anymore. if (sp.issparse(y) and y.shape[0] == 1): # make sure y's shape is: (n_samples, n_labels) # when it is sparse y = y.T dump_svmlight_file(X.astype(dtype), y, f, comment="test", zero_based=zero_based) f.seek(0) comment = f.readline() try: comment = str(comment, "utf-8") except TypeError: # fails in Python 2.x pass assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() try: comment = str(comment, "utf-8") except TypeError: # fails in Python 2.x pass assert_in(["one", "zero"][zero_based] + "-based", comment) X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) assert_equal(X2.dtype, dtype) assert_array_equal(X2.sorted_indices().indices, X2.indices) X2_dense = X2.toarray() if dtype == np.float32: # allow a rounding error at the last decimal place assert_array_almost_equal(X_dense.astype(dtype), X2_dense, 4) assert_array_almost_equal(y_dense.astype(dtype), y2, 4) else: # allow a rounding error at the last decimal place assert_array_almost_equal(X_dense.astype(dtype), X2_dense, 15) assert_array_almost_equal(y_dense.astype(dtype), y2, 15)
def parseResponse(self, rawResponse, type="curl"): self.__content = "" self._headers = [] tp = TextParser() tp.setSource("string", rawResponse) tp.readUntil("(HTTP\S*) ([0-9]+)") while True: while True: try: self.protocol = tp[0][0] except Exception: self.protocol = "unknown" try: self.code = tp[0][1] except Exception: self.code = "0" if self.code != "100": break else: tp.readUntil("(HTTP\S*) ([0-9]+)") self.code = int(self.code) while True: tp.readLine() if (tp.search("^([^:]+): ?(.*)$")): self.addHeader(tp[0][0], tp[0][1]) else: break # curl sometimes sends two headers when using follow, 302 and the final header tp.readLine() if not tp.search("(HTTP\S*) ([0-9]+)"): break else: self._headers = [] while tp.skip(1): self.addContent(tp.lastFull_line) if type == 'curl': self.delHeader("Transfer-Encoding") if self.header_equal("Transfer-Encoding", "chunked"): result = "" content = BytesIO(self.__content) hexa = content.readline() nchunk = int(hexa.strip(), 16) while nchunk: result += content.read(nchunk) content.readline() hexa = content.readline() nchunk = int(hexa.strip(), 16) self.__content = result if self.header_equal("Content-Encoding", "gzip"): compressedstream = BytesIO(self.__content) gzipper = gzip.GzipFile(compressedstream) body = gzipper.read() self.__content = body self.delHeader("Content-Encoding")
class BufferedReader(object): """ A wrapping line reader which wraps an existing reader. Read operations operate on underlying buffer, which is filled to block_size (16384 default) If an optional decompress type is specified, data is fed through the decompressor when read from the buffer. Currently supported decompression: gzip If unspecified, default decompression is None If decompression is specified, and decompress fails on first try, data is assumed to not be compressed and no exception is thrown. If a failure occurs after data has been partially decompressed, the exception is propagated. """ DECOMPRESSORS = { 'gzip': gzip_decompressor, 'deflate': deflate_decompressor, 'deflate_alt': deflate_decompressor_alt } def __init__(self, stream, block_size=BUFF_SIZE, decomp_type=None, starting_data=None): self.stream = stream self.block_size = block_size self._init_decomp(decomp_type) self.buff = None self.starting_data = starting_data self.num_read = 0 self.buff_size = 0 def set_decomp(self, decomp_type): self._init_decomp(decomp_type) def _init_decomp(self, decomp_type): self.num_block_read = 0 if decomp_type: try: self.decomp_type = decomp_type self.decompressor = self.DECOMPRESSORS[decomp_type.lower()]() except KeyError: raise Exception('Decompression type not supported: ' + decomp_type) else: self.decomp_type = None self.decompressor = None def _fillbuff(self, block_size=None): if not self.empty(): return # can't read past next member if self.rem_length() > 0: return block_size = block_size or self.block_size if self.starting_data: data = self.starting_data self.starting_data = None else: data = self.stream.read(block_size) self._process_read(data) # if raw data is not empty and decompressor set, but # decompressed buff is empty, keep reading -- # decompressor likely needs more data to decompress while data and self.decompressor and not self.decompressor.unused_data and self.empty( ): data = self.stream.read(block_size) self._process_read(data) def _process_read(self, data): # don't process if no raw data read if not data: self.buff = None return data = self._decompress(data) self.buff_size = len(data) self.num_read += self.buff_size self.num_block_read += self.buff_size self.buff = BytesIO(data) def _decompress(self, data): if self.decompressor and data: try: data = self.decompressor.decompress(data) except Exception as e: # if first read attempt, assume non-gzipped stream if self.num_block_read == 0: if self.decomp_type == 'deflate': self._init_decomp('deflate_alt') data = self._decompress(data) else: self.decompressor = None # otherwise (partly decompressed), something is wrong else: print(str(e)) return b'' return data def read(self, length=None): """ Fill bytes and read some number of bytes (up to length if specified) <= length bytes may be read if reached the end of input if at buffer boundary, will attempt to read again until specified length is read """ all_buffs = [] while length is None or length > 0: self._fillbuff() if self.empty(): break buff = self.buff.read(length) all_buffs.append(buff) if length: length -= len(buff) return b''.join(all_buffs) def readline(self, length=None): """ Fill buffer and read a full line from the buffer (up to specified length, if provided) If no newline found at end, try filling buffer again in case at buffer boundary. """ if length == 0: return b'' self._fillbuff() if self.empty(): return b'' linebuff = self.buff.readline(length) # we may be at a boundary while not linebuff.endswith(b'\n'): if length: length -= len(linebuff) if length <= 0: break self._fillbuff() if self.empty(): break linebuff += self.buff.readline(length) return linebuff def empty(self): return not self.buff or self.buff.tell() >= self.buff_size def read_next_member(self): if not self.decompressor or not self.decompressor.unused_data: return False self.starting_data = self.decompressor.unused_data self._init_decomp(self.decomp_type) return True def rem_length(self): rem = 0 if self.buff: rem = self.buff_size - self.buff.tell() if self.decompressor and self.decompressor.unused_data: rem += len(self.decompressor.unused_data) return rem def close(self): if self.stream: self.stream.close() self.stream = None @classmethod def get_supported_decompressors(cls): return cls.DECOMPRESSORS.keys()
from io import BytesIO, StringIO bio = BytesIO() print(bio.readable(), bio.writable(), bio.seekable()) bio.write(b'magede\nPython') bio.seek(0) print(bio.readline()) print(bio.getvalue()) bio.close() sio = StringIO() print(sio.readable(), sio.writable(), sio.seekable()) sio.write('magedu\nPython') sio.seek(0) print(sio.readline()) print(sio.getvalue()) sio.close() # 二者都是io模块中的类:在内存中,开辟一个文本或者二进制模式的buffer,可以像文件对象一样操作它, # 当close方法被调用的时候,这个buffer会被释放 # getvalue()获取全部内容,跟文件指针没有关系 # StringIO的好处:一般来说,磁盘的操作比内存的操作要慢的多,内存足够的情况下, # 一般的优化思路是少落地,减少磁盘IO的过程,可以大大提高程序的运行效率 # 类文件对象:file-like对象,可以像文件对象一样操作 from sys import stdout f = stdout print(type(f)) f.write('magedu.com') # 控制台输出
class HttpRequest: """A basic HTTP request.""" # The encoding used in GET/POST dicts. None means use default setting. _encoding = None _upload_handlers = [] def __init__(self): # WARNING: The `WSGIRequest` subclass doesn't call `super`. # Any variable assignment made here should also happen in # `WSGIRequest.__init__()`. self.GET = QueryDict(mutable=True) self.POST = QueryDict(mutable=True) self.COOKIES = {} self.META = {} self.FILES = MultiValueDict() self.path = '' self.path_info = '' self.method = None self.resolver_match = None self.content_type = None self.content_params = None def __repr__(self): if self.method is None or not self.get_full_path(): return '<%s>' % self.__class__.__name__ return '<%s: %s %r>' % (self.__class__.__name__, self.method, self.get_full_path()) @cached_property def headers(self): return HttpHeaders(self.META) def _get_raw_host(self): """ Return the HTTP host using the environment or request headers. Skip allowed hosts protection, so may return an insecure host. """ # We try three options, in order of decreasing preference. if settings.USE_X_FORWARDED_HOST and ('HTTP_X_FORWARDED_HOST' in self.META): host = self.META['HTTP_X_FORWARDED_HOST'] elif 'HTTP_HOST' in self.META: host = self.META['HTTP_HOST'] else: # Reconstruct the host using the algorithm from PEP 333. host = self.META['SERVER_NAME'] server_port = self.get_port() if server_port != ('443' if self.is_secure() else '80'): host = '%s:%s' % (host, server_port) return host def get_host(self): """Return the HTTP host using the environment or request headers.""" host = self._get_raw_host() # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True. allowed_hosts = settings.ALLOWED_HOSTS if settings.DEBUG and not allowed_hosts: allowed_hosts = ['localhost', '127.0.0.1', '[::1]'] domain, port = split_domain_port(host) if domain and validate_host(domain, allowed_hosts): return host else: msg = "Invalid HTTP_HOST header: %r." % host if domain: msg += " You may need to add %r to ALLOWED_HOSTS." % domain else: msg += " The domain name provided is not valid according to RFC 1034/1035." raise DisallowedHost(msg) def get_port(self): """Return the port number for the request as a string.""" if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META: port = self.META['HTTP_X_FORWARDED_PORT'] else: port = self.META['SERVER_PORT'] return str(port) def get_full_path(self, force_append_slash=False): return self._get_full_path(self.path, force_append_slash) def get_full_path_info(self, force_append_slash=False): return self._get_full_path(self.path_info, force_append_slash) def _get_full_path(self, path, force_append_slash): # RFC 3986 requires query string arguments to be in the ASCII range. # Rather than crash if this doesn't happen, we encode defensively. return '%s%s%s' % (escape_uri_path(path), '/' if force_append_slash and not path.endswith('/') else '', ('?' + iri_to_uri(self.META.get('QUERY_STRING', '')) ) if self.META.get('QUERY_STRING', '') else '') def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None): """ Attempt to return a signed cookie. If the signature fails or the cookie has expired, raise an exception, unless the `default` argument is provided, in which case return that value. """ try: cookie_value = self.COOKIES[key] except KeyError: if default is not RAISE_ERROR: return default else: raise try: value = signing.get_cookie_signer(salt=key + salt).unsign( cookie_value, max_age=max_age) except signing.BadSignature: if default is not RAISE_ERROR: return default else: raise return value def get_raw_uri(self): """ Return an absolute URI from variables available in this request. Skip allowed hosts protection, so may return insecure URI. """ return '{scheme}://{host}{path}'.format( scheme=self.scheme, host=self._get_raw_host(), path=self.get_full_path(), ) def build_absolute_uri(self, location=None): """ Build an absolute URI from the location and the variables available in this request. If no ``location`` is specified, build the absolute URI using request.get_full_path(). If the location is absolute, convert it to an RFC 3987 compliant URI and return it. If location is relative or is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base URL constructed from the request variables. """ if location is None: # Make it an absolute url (but schemeless and domainless) for the # edge case that the path starts with '//'. location = '//%s' % self.get_full_path() bits = urlsplit(location) if not (bits.scheme and bits.netloc): # Handle the simple, most common case. If the location is absolute # and a scheme or host (netloc) isn't provided, skip an expensive # urljoin() as long as no path segments are '.' or '..'. if (bits.path.startswith('/') and not bits.scheme and not bits.netloc and '/./' not in bits.path and '/../' not in bits.path): # If location starts with '//' but has no netloc, reuse the # schema and netloc from the current request. Strip the double # slashes and continue as if it wasn't specified. if location.startswith('//'): location = location[2:] location = self._current_scheme_host + location else: # Join the constructed URL with the provided location, which # allows the provided location to apply query strings to the # base path. location = urljoin(self._current_scheme_host + self.path, location) return iri_to_uri(location) @cached_property def _current_scheme_host(self): return '{}://{}'.format(self.scheme, self.get_host()) def _get_scheme(self): """ Hook for subclasses like WSGIRequest to implement. Return 'http' by default. """ return 'http' @property def scheme(self): if settings.SECURE_PROXY_SSL_HEADER: try: header, secure_value = settings.SECURE_PROXY_SSL_HEADER except ValueError: raise ImproperlyConfigured( 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.' ) header_value = self.META.get(header) if header_value is not None: return 'https' if header_value == secure_value else 'http' return self._get_scheme() def is_secure(self): return self.scheme == 'https' def is_ajax(self): return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' @property def encoding(self): return self._encoding @encoding.setter def encoding(self, val): """ Set the encoding used for GET/POST accesses. If the GET or POST dictionary has already been created, remove and recreate it on the next access (so that it is decoded correctly). """ self._encoding = val if hasattr(self, 'GET'): del self.GET if hasattr(self, '_post'): del self._post def _initialize_handlers(self): self._upload_handlers = [ uploadhandler.load_handler(handler, self) for handler in settings.FILE_UPLOAD_HANDLERS ] @property def upload_handlers(self): if not self._upload_handlers: # If there are no upload handlers defined, initialize them from settings. self._initialize_handlers() return self._upload_handlers @upload_handlers.setter def upload_handlers(self, upload_handlers): if hasattr(self, '_files'): raise AttributeError( "You cannot set the upload handlers after the upload has been processed." ) self._upload_handlers = upload_handlers def parse_file_upload(self, META, post_data): """Return a tuple of (POST QueryDict, FILES MultiValueDict).""" self.upload_handlers = ImmutableList( self.upload_handlers, warning= "You cannot alter upload handlers after the upload has been processed." ) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) return parser.parse() @property def body(self): if not hasattr(self, '_body'): if self._read_started: raise RawPostDataException( "You cannot access body after reading from request's data stream" ) # Limit the maximum request data size that will be handled in-memory. if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and int(self.META.get('CONTENT_LENGTH') or 0) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE): raise RequestDataTooBig( 'Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.' ) try: self._body = self.read() except IOError as e: raise UnreadablePostError(*e.args) from e self._stream = BytesIO(self._body) return self._body def _mark_post_parse_error(self): self._post = QueryDict() self._files = MultiValueDict() def _load_post_and_files(self): """Populate self._post and self._files if the content-type is a form type""" if self.method != 'POST': self._post, self._files = QueryDict( encoding=self._encoding), MultiValueDict() return if self._read_started and not hasattr(self, '_body'): self._mark_post_parse_error() return if self.content_type == 'multipart/form-data': if hasattr(self, '_body'): # Use already read data data = BytesIO(self._body) else: data = self try: self._post, self._files = self.parse_file_upload( self.META, data) except MultiPartParserError: # An error occurred while parsing POST data. Since when # formatting the error the request handler might access # self.POST, set self._post and self._file to prevent # attempts to parse POST data again. self._mark_post_parse_error() raise elif self.content_type == 'application/x-www-form-urlencoded': self._post, self._files = QueryDict( self.body, encoding=self._encoding), MultiValueDict() else: self._post, self._files = QueryDict( encoding=self._encoding), MultiValueDict() def close(self): if hasattr(self, '_files'): for f in chain.from_iterable(l[1] for l in self._files.lists()): f.close() # File-like and iterator interface. # # Expects self._stream to be set to an appropriate source of bytes by # a corresponding request subclass (e.g. WSGIRequest). # Also when request data has already been read by request.POST or # request.body, self._stream points to a BytesIO instance # containing that data. def read(self, *args, **kwargs): self._read_started = True try: return self._stream.read(*args, **kwargs) except IOError as e: raise UnreadablePostError(*e.args) from e def readline(self, *args, **kwargs): self._read_started = True try: return self._stream.readline(*args, **kwargs) except IOError as e: raise UnreadablePostError(*e.args) from e def __iter__(self): return iter(self.readline, b'') def xreadlines(self): warnings.warn( 'HttpRequest.xreadlines() is deprecated in favor of iterating the ' 'request.', RemovedInDjango30Warning, stacklevel=2, ) yield from self def readlines(self): return list(self)
#! /usr/bin/env python3 # -*- coding: utf-8 -*- """a test script of BytesIO""" __author__ = 'Jason Yang' from io import BytesIO # write to BytesIO: f = BytesIO() f.write(b'hello') f.write(b' ') f.write(b'world!') # getvalue(): b'hello world!' print('getvalue():', f.getvalue()) # read from BytesIO f = BytesIO('临江仙 宋·苏轼\n' '夜饮东坡醒复醉,\n归来仿佛三更。\n家童鼻息已雷鸣。\n敲门都不应,\n倚杖听江声。\n\n' '长恨此身非我有,\n何时忘却营营。\n夜阑风静縠纹平。\n小舟从此逝,\n江海寄余生。'.encode('utf-8')) while True: s = f.readline() if s == b'': break print(s.strip().decode('utf-8'))